[llvm] [RISCV] Support RVV register overlapping constraints (PR #145004)

Piyou Chen via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 02:37:53 PDT 2025


https://github.com/BeMg created https://github.com/llvm/llvm-project/pull/145004

Relate to https://github.com/llvm/llvm-project/issues/144289

This PR aims to demonstrate how we can model the RVV register overlapping constraints instead of using early-clobber, and how it affects the codegen results.

I don't expect this version to be the final version, but it provides a proof of concept to measure the performance gain (register pressure decrease) after implementing RVV register overlapping constraints.

>From 7ef13811608d68018d051a4a95329c5d084e596a Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Thu, 19 Jun 2025 03:38:39 -0700
Subject: [PATCH 1/2] [RISCV] Support overlapping constraints

---
 llvm/include/llvm/CodeGen/LiveRegMatrix.h     |   7 +
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  16 ++
 llvm/lib/CodeGen/LiveRegMatrix.cpp            |  71 +++++-
 llvm/lib/CodeGen/MachineCopyPropagation.cpp   |  11 +
 llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp  |   8 +-
 llvm/lib/CodeGen/RegAllocGreedy.cpp           |   4 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |  83 +++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |   3 +
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |   3 +-
 llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp   | 225 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVRegisterInfo.h     |  18 ++
 11 files changed, 443 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 0bc243271bb73..a6d13d9f534d0 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -41,6 +41,7 @@ class LiveRegMatrix {
   friend class LiveRegMatrixWrapperLegacy;
   friend class LiveRegMatrixAnalysis;
   const TargetRegisterInfo *TRI = nullptr;
+  const MachineRegisterInfo *MRI;
   LiveIntervals *LIS = nullptr;
   VirtRegMap *VRM = nullptr;
 
@@ -108,6 +109,12 @@ class LiveRegMatrix {
   InterferenceKind checkInterference(const LiveInterval &VirtReg,
                                      MCRegister PhysReg);
 
+  /// Check for target interference before assigning VirtReg to PhysReg.Add
+  /// commentMore actions
+  SmallVector<const LiveInterval *, 8>
+  getTargetInterferenceLiveI(const LiveInterval &VirtReg,
+                             MCRegister PhysReg) const;
+
   /// Check for interference in the segment [Start, End) that may prevent
   /// assignment to PhysReg. If this function returns true, there is
   /// interference in the segment [Start, End) of some other interval already
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 8b9ed78a8e970..7cfad070545d1 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -16,6 +16,7 @@
 #define LLVM_CODEGEN_TARGETREGISTERINFO_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
@@ -42,6 +43,7 @@ class RegScavenger;
 class VirtRegMap;
 class LiveIntervals;
 class LiveInterval;
+class LiveRange;
 class TargetRegisterClass {
 public:
   using iterator = const MCPhysReg *;
@@ -1230,6 +1232,20 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  virtual bool enableTargetInterference() const { return false; }
+
+  virtual BitVector getTargetInterferenceReg(const LiveInterval &VirtReg,
+                                             MCRegister PhysReg,
+                                             const MachineRegisterInfo *MRI,
+                                             const VirtRegMap *VRM) const {
+    return BitVector();
+  }
+
+  virtual bool needUpdateECSlot(const LiveRange &LR, LiveRange &newLR,
+                                const LiveIntervals &LIS) const {
+    return false;
+  }
+
   /// Some targets delay assigning the frame until late and use a placeholder
   /// to represent it earlier. This method can be used to identify the frame
   /// register placeholder.
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index cfda262aac82d..64e1695e9b62e 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -60,6 +60,7 @@ bool LiveRegMatrixWrapperLegacy::runOnMachineFunction(MachineFunction &MF) {
 void LiveRegMatrix::init(MachineFunction &MF, LiveIntervals &pLIS,
                          VirtRegMap &pVRM) {
   TRI = MF.getSubtarget().getRegisterInfo();
+  MRI = &MF.getRegInfo();
   LIS = &pLIS;
   VRM = &pVRM;
 
@@ -83,6 +84,14 @@ void LiveRegMatrix::releaseMemory() {
   }
 }
 
+static LiveRange copyLiveRange(const LiveRange &LR) {
+  LiveRange NewLR;
+  for (auto Seg : LR.segments)
+    NewLR.addSegment(Seg);
+  NewLR.valnos.append(LR.vni_begin(), LR.vni_end());
+  return NewLR;
+}
+
 template <typename Callable>
 static bool foreachUnit(const TargetRegisterInfo *TRI,
                         const LiveInterval &VRegInterval, MCRegister PhysReg,
@@ -117,7 +126,12 @@ void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) {
   foreachUnit(
       TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) {
         LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
-        Matrix[Unit].unify(VirtReg, Range);
+        LiveRange NewLR;
+        if (TRI->enableTargetInterference() &&
+            TRI->needUpdateECSlot(Range, NewLR = copyLiveRange(Range), *LIS))
+          Matrix[Unit].unify(VirtReg, NewLR);
+        else
+          Matrix[Unit].unify(VirtReg, Range);
         return false;
       });
 
@@ -134,7 +148,13 @@ void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
   foreachUnit(TRI, VirtReg, PhysReg,
               [&](unsigned Unit, const LiveRange &Range) {
                 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
-                Matrix[Unit].extract(VirtReg, Range);
+                LiveRange NewLR;
+                if (TRI->enableTargetInterference() &&
+                    TRI->needUpdateECSlot(Range, NewLR = copyLiveRange(Range),
+                                          *LIS))
+                  Matrix[Unit].extract(VirtReg, NewLR);
+                else
+                  Matrix[Unit].extract(VirtReg, Range);
                 return false;
               });
 
@@ -150,6 +170,31 @@ bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
   return false;
 }
 
+SmallVector<const LiveInterval *, 8>
+LiveRegMatrix::getTargetInterferenceLiveI(const LiveInterval &VirtReg,
+                                          MCRegister PhysReg) const {
+  SmallVector<const LiveInterval *, 8> LiveIs;
+  if (!TRI->enableTargetInterference())
+    return LiveIs;
+
+  BitVector IntfReg = TRI->getTargetInterferenceReg(VirtReg, PhysReg, MRI, VRM);
+
+  for (auto Reg : IntfReg.set_bits()) {
+    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+      LiveIntervalUnion LiveUnion = Matrix[*Units];
+      LiveIntervalUnion::ConstSegmentIter LiveUnionI = LiveUnion.begin();
+      while (LiveUnionI != LiveUnion.end()) {
+        const LiveRange *LiveR = *LiveUnionI;
+        LiveRange NewLiveR = copyLiveRange(*LiveR);
+        if (TRI->needUpdateECSlot(LiveR, NewLiveR, *LIS))
+          LiveIs.push_back(*LiveUnionI);
+        ++LiveUnionI;
+      }
+    }
+  }
+  return LiveIs;
+}
+
 bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg,
                                              MCRegister PhysReg) {
   // Check if the cached information is valid.
@@ -178,7 +223,12 @@ bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg,
   bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
                                                        const LiveRange &Range) {
     const LiveRange &UnitRange = LIS->getRegUnit(Unit);
-    return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
+    LiveRange NewLR;
+    if (TRI->enableTargetInterference() &&
+        TRI->needUpdateECSlot(Range, NewLR = copyLiveRange(Range), *LIS))
+      return NewLR.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
+    else
+      return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
   });
   return Result;
 }
@@ -204,9 +254,24 @@ LiveRegMatrix::checkInterference(const LiveInterval &VirtReg,
   if (checkRegUnitInterference(VirtReg, PhysReg))
     return IK_RegUnit;
 
+  if (TRI->enableTargetInterference() &&
+      TRI->getTargetInterferenceReg(VirtReg, PhysReg, MRI, VRM).any())
+    return IK_VirtReg;
+
   // Check the matrix for virtual register interference.
   bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
                                   [&](MCRegUnit Unit, const LiveRange &LR) {
+                                    LiveRange NewLR;
+                                    if (TRI->enableTargetInterference() &&
+                                        TRI->needUpdateECSlot(
+                                            LR, NewLR = copyLiveRange(LR), *LIS)) {
+                                      // Update LiveRange could make cache
+                                      // information stable. Refresh cache to
+                                      // handle it.
+                                      invalidateVirtRegs();
+                                      return query(NewLR, Unit)
+                                          .checkInterference();
+                                    } 
                                     return query(LR, Unit).checkInterference();
                                   });
   if (Interference)
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 742de1101faa2..8cf11992963ba 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -921,6 +921,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
         // later.
         if (MO.isTied())
           ReadRegister(Reg, MI, RegularUse);
+        // If it be used by another instruction, it should not be deleted.Add commentMore actions
+        for (const MachineOperand &UseMO : MI.uses()) {
+          if (!UseMO.isReg())
+            continue;
+          MCRegister UseReg = UseMO.getReg().asMCReg();
+          if (UseReg == MCRegister::NoRegister)
+            continue;
+          if (!TRI->regsOverlap(Reg, UseReg))
+            continue;
+          ReadRegister(UseReg, MI, RegularUse);
+        }
         Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
       }
 
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 30523611977f4..bc4b05201b66a 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -262,11 +262,15 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
     // If there is 10 or more interferences, chances are one is heavier.
     const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
-    if (Interferences.size() >= EvictInterferenceCutoff)
+    SmallVector<const LiveInterval *, 8> Intfs =
+        Matrix->getTargetInterferenceLiveI(VirtReg, PhysReg);
+    Intfs.append(Interferences.begin(), Interferences.end());
+
+    if (Intfs.size() >= EvictInterferenceCutoff)
       return false;
 
     // Check if any interfering live range is heavier than MaxWeight.
-    for (const LiveInterval *Intf : reverse(Interferences)) {
+    for (const LiveInterval *Intf : reverse(Intfs)) {
       assert(Intf->reg().isVirtual() &&
              "Only expecting virtual register interference from query");
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index fa384b296f2e6..2173cb4aa7da2 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -635,6 +635,10 @@ void RAGreedy::evictInterference(const LiveInterval &VirtReg,
     Intfs.append(IVR.begin(), IVR.end());
   }
 
+  SmallVector<const LiveInterval *, 8> TargetIntfs =
+      Matrix->getTargetInterferenceLiveI(VirtReg, PhysReg);
+  Intfs.append(TargetIntfs.begin(), TargetIntfs.end());
+
   // Evict them second. This will invalidate the queries.
   for (const LiveInterval *Intf : Intfs) {
     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index d9ef911b9a32e..23234e35b08e1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2655,6 +2655,86 @@ void RISCVInstrInfo::genAlternativeCodeSequence(
   }
 }
 
+int RISCVInstrInfo::getOverlapConstraintsFromMI(uint16_t RVVPseudoOpcode) {
+  const RISCVVPseudosTable::PseudoInfo *RVV =
+      RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
+
+  if (!RVV)
+    return 0;
+
+  return RVV->TargetOverlapConstraintType;
+}
+
+static bool getConstrainsBetweenDstAndSrc(const MachineInstr *MBBI,
+                                          MachineOperand DestMO,
+                                          MachineOperand SrcMO,
+                                          int OverlapConstraintsType) {
+  MCRegister DestReg = DestMO.getReg();
+  MCRegister SrcReg = SrcMO.getReg();
+
+  unsigned DestRegIndex = RISCVRegisterInfo::getMCRegIndex(
+      DestReg, &MBBI->getParent()->getParent()->getRegInfo());
+  unsigned DestRegLMUL = RISCVRegisterInfo::getMCRegLMUL(DestReg);
+  unsigned SrcRegIndex = RISCVRegisterInfo::getMCRegIndex(
+      SrcReg, &MBBI->getParent()->getParent()->getRegInfo());
+  unsigned SrcRegLMUL = RISCVRegisterInfo::getMCRegLMUL(SrcReg);
+
+  if (OverlapConstraintsType == 2)
+    return RISCVRegisterInfo::isRVVConstraintsType2(DestRegIndex, DestRegLMUL,
+                                                    SrcRegIndex, SrcRegLMUL);
+  if (OverlapConstraintsType == 3)
+    return RISCVRegisterInfo::isRVVConstraintsType3(DestRegIndex, DestRegLMUL,
+                                                    SrcRegIndex, SrcRegLMUL);
+
+  return false;
+}
+
+// FIXME: Are we need support register like V26_V27?
+static bool isVectorPhyRegClass(MCRegister Reg) {
+  return (RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM2RegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM4RegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM8RegClassID].contains(Reg));
+}
+
+static bool getConstraintsWithDestAndAllSrc(const MachineInstr *MBBI,
+                                            int OverlapConstraintsType) {
+  bool NeedConstraint = false;
+  int DestOperand = 0; // assume Early-Clobber Operand is 0.
+
+  if (!MBBI->getOperand(DestOperand).isReg() ||
+      !MBBI->getOperand(DestOperand).getReg().isPhysical())
+    return false;
+
+  for (auto &MO : MBBI->uses()) {
+    if (!MO.isReg())
+      continue;
+    if (MO.isTied())
+      continue;
+    if (MO.getReg().isPhysical() && !MO.isImplicit() &&
+        isVectorPhyRegClass(MO.getReg()))
+      NeedConstraint |= getConstrainsBetweenDstAndSrc(
+          MBBI, MBBI->getOperand(DestOperand), MO, OverlapConstraintsType);
+  }
+
+  return NeedConstraint;
+}
+
+static bool hasTargetInterference(const MachineInstr *MBBI) {
+  int OverlapConstraintsType =
+      RISCVInstrInfo::getOverlapConstraintsFromMI(MBBI->getOpcode());
+  if (OverlapConstraintsType == 2 || OverlapConstraintsType == 3)
+    return getConstraintsWithDestAndAllSrc(MBBI, OverlapConstraintsType);
+
+  return false;
+}
+
+static bool isEarlyClobberMI(const MachineInstr *MBBI) {
+  return llvm::any_of(MBBI->defs(), [](const MachineOperand &DefMO) {
+    return DefMO.isReg() && DefMO.isEarlyClobber();
+  });
+}
+
 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
                                        StringRef &ErrInfo) const {
   MCInstrDesc const &Desc = MI.getDesc();
@@ -2922,6 +3002,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
     return false;
   }
 
+  if (hasTargetInterference(&MI) && isEarlyClobberMI(&MI))
+    return false;
+
   return true;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 020be91e90e0b..5d1e9401f89af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -286,6 +286,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern,
                     SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
 
+  static int getOverlapConstraintsFromMI(uint16_t RVVPseudoOpcode);
+
   void genAlternativeCodeSequence(
       MachineInstr &Root, unsigned Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -398,6 +400,7 @@ namespace RISCVVPseudosTable {
 struct PseudoInfo {
   uint16_t Pseudo;
   uint16_t BaseInstr;
+  uint16_t TargetOverlapConstraintType;
 };
 
 #define GET_RISCVVPseudosTable_DECL
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 9c03c7c83af04..2a0f13ba014ae 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -556,6 +556,7 @@ defset list<VTypeInfoToWide> AllWidenableBFloatToFloatVectors = {
 class RISCVVPseudo {
   Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key.
   Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+  bits<2> TargetOverlapConstraintType = 1;
   // SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
   bits<8> SEW = 0;
   bit IncludeInInversePseudoTable = 1;
@@ -565,7 +566,7 @@ class RISCVVPseudo {
 def RISCVVPseudosTable : GenericTable {
   let FilterClass = "RISCVVPseudo";
   let CppTypeName = "PseudoInfo";
-  let Fields = [ "Pseudo", "BaseInstr" ];
+  let Fields = [ "Pseudo", "BaseInstr", "TargetOverlapConstraintType"];
   let PrimaryKey = [ "Pseudo" ];
   let PrimaryKeyName = "getPseudoInfo";
   let PrimaryKeyEarlyOut = true;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 7fdbf4be1ed12..9beeab28ae56f 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -15,6 +15,8 @@
 #include "RISCVSubtarget.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -29,6 +31,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool>
+    EnableRVVOverlapConstraints("riscv-rvv-overlap-constraints",
+                                cl::desc("Enable RVV overlap constraints."),
+                                cl::init(true), cl::Hidden);
+
 static cl::opt<bool> DisableCostPerUse("riscv-disable-cost-per-use",
                                        cl::init(false), cl::Hidden);
 static cl::opt<bool>
@@ -1040,3 +1047,221 @@ bool RISCVRegisterInfo::getRegAllocationHints(
 
   return BaseImplRetVal;
 }
+
+unsigned RISCVRegisterInfo::getMCRegIndex(MCRegister Reg,
+                                          const MachineRegisterInfo *MRI) {
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+  return TRI->getEncodingValue(Reg);
+}
+
+unsigned RISCVRegisterInfo::getMCRegLMUL(MCRegister Reg) {
+  if (RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg))
+    return 1;
+  if (RISCVMCRegisterClasses[RISCV::VRM2RegClassID].contains(Reg))
+    return 2;
+  if (RISCVMCRegisterClasses[RISCV::VRM4RegClassID].contains(Reg))
+    return 4;
+  if (RISCVMCRegisterClasses[RISCV::VRM8RegClassID].contains(Reg))
+    return 8;
+
+  llvm_unreachable("Not supported Register.");
+}
+
+// The destination EEW is smaller than the source EEW and the overlap is in the
+// lowest-numbered part of the source register group (e.g., when LMUL=1,
+// vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not).
+bool RISCVRegisterInfo::isRVVConstraintsType2(unsigned DestRegIndex,
+                                              unsigned DestRegLMUL,
+                                              unsigned SrcRegIndex,
+                                              unsigned SrcRegLMUL) {
+  if (DestRegIndex == SrcRegIndex)
+    return false;
+
+  return SrcRegIndex < DestRegIndex && DestRegIndex < SrcRegIndex + SrcRegLMUL;
+}
+
+// The destination EEW is greater than the source EEW, the source EMUL is at
+// least 1, and the overlap is in the highest-numbered part of the destination
+// register group (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of
+// v0, v2, or v4 is not).
+bool RISCVRegisterInfo::isRVVConstraintsType3(unsigned DestRegIndex,
+                                              unsigned DestRegLMUL,
+                                              unsigned SrcRegIndex,
+                                              unsigned SrcRegLMUL) {
+  if (DestRegLMUL == SrcRegLMUL && DestRegIndex == SrcRegIndex)
+    return true;
+
+  if (SrcRegIndex == DestRegIndex + DestRegLMUL - SrcRegLMUL)
+    return false;
+
+  return DestRegIndex <= SrcRegIndex &&
+         SrcRegIndex < DestRegIndex + DestRegLMUL - SrcRegLMUL;
+}
+
+static MCRegister getPhysicalReg(const MachineOperand MO,
+                                 const MachineOperand MObeAllocated,
+                                 MCRegister PhysRegbeAllocated,
+                                 const VirtRegMap *VRM) {
+  if (MObeAllocated.isIdenticalTo(MO))
+    return PhysRegbeAllocated;
+
+  if (MO.getReg().isPhysical())
+    return MO.getReg().asMCReg();
+
+  if (MO.getReg().isVirtual() && VRM->hasPhys(MO.getReg()))
+    return VRM->getPhys(MO.getReg());
+
+  return PhysRegbeAllocated;
+}
+
+static bool checkConstraintsWithTwoOperand(
+    const MachineInstr *MI, const MachineOperand MObeAllocated,
+    MCRegister PhysRegbeAllocated, MachineOperand DestMO, MachineOperand SrcMO,
+    const MachineRegisterInfo *MRI, const VirtRegMap *VRM) {
+
+  assert(DestMO.isReg() && SrcMO.isReg() && "Must be Register");
+
+  // Allocate for DestMO or SrcMO.
+  if (!MObeAllocated.isIdenticalTo(DestMO) &&
+      !MObeAllocated.isIdenticalTo(SrcMO))
+    return false;
+
+  // Another Operand already assign phyiscal register.
+  if (DestMO.getReg().isVirtual() && !VRM->hasPhys(DestMO.getReg()) &&
+      SrcMO.getReg().isVirtual() && !VRM->hasPhys(SrcMO.getReg()))
+    return false;
+
+  MCRegister DestReg =
+      getPhysicalReg(DestMO, MObeAllocated, PhysRegbeAllocated, VRM);
+  MCRegister SrcReg =
+      getPhysicalReg(SrcMO, MObeAllocated, PhysRegbeAllocated, VRM);
+
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+
+  // Handle sub-register like sub_vrm4_0:vrm8
+  if (DestMO.getSubReg() != 0)
+    DestReg = TRI->getSubReg(DestReg, DestMO.getSubReg());
+
+  if (SrcMO.getSubReg() != 0)
+    SrcReg = TRI->getSubReg(SrcReg, SrcMO.getSubReg());
+
+  unsigned DestRegIndex = RISCVRegisterInfo::getMCRegIndex(DestReg, MRI);
+  unsigned DestRegLMUL = RISCVRegisterInfo::getMCRegLMUL(DestReg);
+  unsigned SrcRegIndex = RISCVRegisterInfo::getMCRegIndex(SrcReg, MRI);
+  unsigned SrcRegLMUL = RISCVRegisterInfo::getMCRegLMUL(SrcReg);
+
+  unsigned OverlapConstraints =
+      RISCVInstrInfo::getOverlapConstraintsFromMI(MI->getOpcode());
+  if (OverlapConstraints == 2)
+    return RISCVRegisterInfo::isRVVConstraintsType2(DestRegIndex, DestRegLMUL,
+                                                    SrcRegIndex, SrcRegLMUL);
+  if (OverlapConstraints == 3)
+    return RISCVRegisterInfo::isRVVConstraintsType3(DestRegIndex, DestRegLMUL,
+                                                    SrcRegIndex, SrcRegLMUL);
+  return false;
+}
+
+static bool isVectorVirtRegClass(MachineOperand MO, Register R,
+                                 const MachineRegisterInfo *MRI) {
+  // Register like sub_vrm4_0:vrn2m4 also need check.
+  return RISCV::VRRegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRM2RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRM4RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRM8RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN2M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN2M2RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN2M4RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN3M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN3M2RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN4M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN4M2RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN5M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN6M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN7M1RegClass.hasSubClassEq(MRI->getRegClass(R)) ||
+         RISCV::VRN8M1RegClass.hasSubClassEq(MRI->getRegClass(R));
+}
+
+// FIXME: Are we need support register like V26_V27?
+bool isVectorPhyRegClass(MCRegister Reg, const MachineRegisterInfo *MRI) {
+  return (RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM2RegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM4RegClassID].contains(Reg) ||
+          RISCVMCRegisterClasses[RISCV::VRM8RegClassID].contains(Reg));
+}
+
+static BitVector getConstraintsRegFromMI(const MachineInstr *MI,
+                                         const MachineOperand MObeAllocated,
+                                         MCRegister PhysRegbeAllocated,
+                                         int RegNums,
+                                         const MachineRegisterInfo *MRI,
+                                         const VirtRegMap *VRM) {
+  BitVector IReg(RegNums);
+  // Assume early-clobber operand place on 0.
+  unsigned ECDestOperand = 0;
+  for (auto &MO : MI->uses()) {
+    if (!MO.isReg())
+      continue;
+    if (MO.isTied())
+      continue;
+    if ((MO.getReg().isVirtual() &&
+         isVectorVirtRegClass(MO, MO.getReg(), MRI)) ||
+        (MO.getReg().isPhysical() && !MO.isImplicit() &&
+         isVectorPhyRegClass(MO.getReg(), MRI))) {
+      if (checkConstraintsWithTwoOperand(MI, MObeAllocated, PhysRegbeAllocated,
+                                         MI->getOperand(ECDestOperand), MO, MRI,
+                                         VRM)) {
+        if (VRM->hasPhys(MI->getOperand(ECDestOperand).getReg()))
+          IReg.set(VRM->getPhys(MI->getOperand(ECDestOperand).getReg()));
+        if (VRM->hasPhys(MO.getReg()))
+          IReg.set(VRM->getPhys(MO.getReg()));
+      }
+    }
+  }
+
+  return IReg;
+}
+
+BitVector RISCVRegisterInfo::getTargetInterferenceReg(
+    const LiveInterval &VirtReg, MCRegister PhysReg,
+    const MachineRegisterInfo *MRI, const VirtRegMap *VRM) const {
+  unsigned Reg = VirtReg.reg();
+  int NumRegs = getNumRegs();
+  BitVector IReg(NumRegs);
+
+  for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+    const MachineInstr *MI = MO.getParent();
+    if (MI->getOperand(0).isReg() && !MI->getOperand(0).isEarlyClobber())
+      continue;
+    if (!needConstraintsMI(MI))
+      continue;
+    IReg |= getConstraintsRegFromMI(MI, MO, PhysReg, NumRegs, MRI, VRM);
+  }
+
+  return IReg;
+}
+
+bool RISCVRegisterInfo::needConstraintsMI(const MachineInstr *MI) const {
+  unsigned OverlapConstraints =
+      RISCVInstrInfo::getOverlapConstraintsFromMI(MI->getOpcode());
+  return (OverlapConstraints == 2 || OverlapConstraints == 3);
+}
+
+bool RISCVRegisterInfo::enableTargetInterference() const {
+  return EnableRVVOverlapConstraints;
+}
+
+bool RISCVRegisterInfo::needUpdateECSlot(const LiveRange &LR, LiveRange &newLR,
+                                         const LiveIntervals &LIS) const {
+  bool Changed = false;
+  for (LiveRange::Segment &Seg : newLR.segments) {
+    SlotIndex Start = Seg.start;
+    const MachineInstr *CurrMI = LIS.getInstructionFromIndex(Start);
+    if (CurrMI == nullptr)
+      continue;
+    if (Start.isEarlyClobber() && needConstraintsMI(CurrMI)) {
+      Seg.start = Start.getRegSlot();
+      Changed = true;
+    }
+  }
+  return Changed;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index b368399e2ad14..1c69ba9ffae9f 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -156,6 +156,24 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   static bool isRVVRegClass(const TargetRegisterClass *RC) {
     return RISCVRI::isVRegClass(RC->TSFlags);
   }
+
+  bool enableTargetInterference() const override;
+
+  BitVector getTargetInterferenceReg(const LiveInterval &VirtReg,
+                                     MCRegister PhysReg,
+                                     const MachineRegisterInfo *MRI,
+                                     const VirtRegMap *VRM) const override;
+
+  bool needConstraintsMI(const MachineInstr *MI) const;
+
+  bool needUpdateECSlot(const llvm::LiveRange &LR, llvm::LiveRange &newLR,
+                        const LiveIntervals &LIS) const override;
+  static unsigned getMCRegIndex(MCRegister Reg, const MachineRegisterInfo *MRI);
+  static unsigned getMCRegLMUL(MCRegister Reg);
+  static bool isRVVConstraintsType2(unsigned DestRegIndex, unsigned DestRegLMUL,
+                                    unsigned SrcRegIndex, unsigned SrcRegLMUL);
+  static bool isRVVConstraintsType3(unsigned DestRegIndex, unsigned DestRegLMUL,
+                                    unsigned SrcRegIndex, unsigned SrcRegLMUL);
 };
 } // namespace llvm
 

>From ded86fd7acb3f0e7c63200859d7c1e01b6a1bf87 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Thu, 19 Jun 2025 21:52:53 -0700
Subject: [PATCH 2/2] update testcase

---
 llvm/test/CodeGen/RISCV/memcmp-optsize.ll     |   48 +-
 llvm/test/CodeGen/RISCV/memcmp.ll             |   48 +-
 llvm/test/CodeGen/RISCV/pr94265.ll            |    8 +-
 ...regalloc-last-chance-recoloring-failure.ll |   16 +-
 llvm/test/CodeGen/RISCV/rvv/abd.ll            |   28 +-
 .../CodeGen/RISCV/rvv/active_lane_mask.ll     |   16 +-
 llvm/test/CodeGen/RISCV/rvv/binop-zext.ll     |   40 +-
 llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll        |  230 +-
 .../RISCV/rvv/combine-store-extract-crash.ll  |   20 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll    |  150 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll        |  144 +-
 llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll    |  210 +-
 llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll        |  144 +-
 .../CodeGen/RISCV/rvv/double-round-conv.ll    |  264 +-
 .../CodeGen/RISCV/rvv/extload-truncstore.ll   |  276 +-
 llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll  |   28 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll    |    4 +-
 .../RISCV/rvv/fixed-vectors-binop-splats.ll   |    8 +-
 .../RISCV/rvv/fixed-vectors-ceil-vp.ll        |  111 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |  180 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |  212 +-
 .../rvv/fixed-vectors-extload-truncstore.ll   |  140 +-
 .../RISCV/rvv/fixed-vectors-extract-i1.ll     |   64 +-
 .../RISCV/rvv/fixed-vectors-extract.ll        |   24 +-
 .../RISCV/rvv/fixed-vectors-floor-vp.ll       |  111 +-
 .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll    |  184 +-
 .../RISCV/rvv/fixed-vectors-fmaximum.ll       |   76 +-
 .../RISCV/rvv/fixed-vectors-fmaximumnum.ll    |   20 +-
 .../RISCV/rvv/fixed-vectors-fminimum-vp.ll    |  184 +-
 .../RISCV/rvv/fixed-vectors-fminimum.ll       |   76 +-
 .../RISCV/rvv/fixed-vectors-fminimumnum.ll    |   20 +-
 .../RISCV/rvv/fixed-vectors-fp-conv.ll        |   24 +-
 .../RISCV/rvv/fixed-vectors-fp-setcc.ll       |  408 +--
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 1280 +++----
 .../RISCV/rvv/fixed-vectors-fp2i-sat.ll       |   36 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll   |  174 +-
 .../RISCV/rvv/fixed-vectors-fpext-vp.ll       |    4 +-
 .../RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll |    5 +-
 .../RISCV/rvv/fixed-vectors-fptosi-vp.ll      |   69 +-
 .../RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll |    5 +-
 .../RISCV/rvv/fixed-vectors-fptoui-vp.ll      |   69 +-
 .../RISCV/rvv/fixed-vectors-fptrunc-vp.ll     |   50 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll   |   80 +-
 .../RISCV/rvv/fixed-vectors-int-exttrunc.ll   |    8 +-
 .../RISCV/rvv/fixed-vectors-int-setcc.ll      |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    |   16 +-
 .../rvv/fixed-vectors-inttoptr-ptrtoint.ll    |    7 +-
 .../RISCV/rvv/fixed-vectors-llrint-vp.ll      |   32 +-
 .../RISCV/rvv/fixed-vectors-lrint-vp.ll       |   40 +-
 .../RISCV/rvv/fixed-vectors-mask-splat.ll     |    8 +-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  |   50 +-
 .../RISCV/rvv/fixed-vectors-masked-scatter.ll |    8 +-
 .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll   |  111 +-
 .../RISCV/rvv/fixed-vectors-rint-vp.ll        |  111 +-
 .../RISCV/rvv/fixed-vectors-round-vp.ll       |  111 +-
 .../RISCV/rvv/fixed-vectors-roundeven-vp.ll   |  111 +-
 .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll |  111 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-sad.ll    |   26 +-
 .../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll    |  410 +--
 .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll   |  417 +--
 .../RISCV/rvv/fixed-vectors-sext-vp.ll        |    8 +-
 .../fixed-vectors-shuffle-changes-length.ll   |    7 +-
 .../fixed-vectors-shuffle-deinterleave2.ll    |   16 +-
 .../rvv/fixed-vectors-shuffle-exact-vlen.ll   |    8 +-
 .../fixed-vectors-shuffle-fp-interleave.ll    |   39 +-
 .../RISCV/rvv/fixed-vectors-shuffle-fp.ll     |    4 +-
 .../fixed-vectors-shuffle-int-interleave.ll   |   51 +-
 .../RISCV/rvv/fixed-vectors-shuffle-int.ll    |    8 +-
 .../RISCV/rvv/fixed-vectors-shuffle-rotate.ll |   12 +-
 .../RISCV/rvv/fixed-vectors-sitofp-vp.ll      |   72 +-
 .../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll |   72 +-
 .../RISCV/rvv/fixed-vectors-trunc-vp.ll       |  318 +-
 .../RISCV/rvv/fixed-vectors-uitofp-vp.ll      |   72 +-
 .../RISCV/rvv/fixed-vectors-vfadd-vp.ll       |   84 +-
 .../RISCV/rvv/fixed-vectors-vfclass-vp.ll     |   35 +-
 .../fixed-vectors-vfcmp-constrained-sdnode.ll | 2008 +++++------
 ...fixed-vectors-vfcmps-constrained-sdnode.ll |  828 ++---
 .../RISCV/rvv/fixed-vectors-vfdiv-vp.ll       |   84 +-
 .../RISCV/rvv/fixed-vectors-vfma-vp.ll        |   96 +-
 .../RISCV/rvv/fixed-vectors-vfmax-vp.ll       |   32 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll  |   64 +-
 .../RISCV/rvv/fixed-vectors-vfmin-vp.ll       |   32 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll  |   64 +-
 .../RISCV/rvv/fixed-vectors-vfmul-vp.ll       |   84 +-
 ...fixed-vectors-vfpext-constrained-sdnode.ll |   32 +-
 ...fixed-vectors-vfptoi-constrained-sdnode.ll |  362 +-
 ...xed-vectors-vfptrunc-constrained-sdnode.ll |   51 +-
 .../RISCV/rvv/fixed-vectors-vfsqrt-vp.ll      |   32 +-
 .../RISCV/rvv/fixed-vectors-vfsub-vp.ll       |   84 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll |   60 +-
 .../RISCV/rvv/fixed-vectors-vfwmacc.ll        |   40 +-
 .../RISCV/rvv/fixed-vectors-vfwmaccbf16.ll    |   12 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll |   60 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll |   60 +-
 ...fixed-vectors-vitofp-constrained-sdnode.ll |  230 +-
 .../RISCV/rvv/fixed-vectors-vpgather.ll       |  372 +-
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        |   16 +-
 .../RISCV/rvv/fixed-vectors-vpscatter.ll      |    8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vrol.ll   |    8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vror.ll   |   24 +-
 .../RISCV/rvv/fixed-vectors-vwadd-mask.ll     |   13 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll  |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll  |   96 +-
 .../RISCV/rvv/fixed-vectors-vwmulsu.ll        |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll  |  220 +-
 .../RISCV/rvv/fixed-vectors-vwsub-mask.ll     |   13 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll  |   96 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll |   96 +-
 .../RISCV/rvv/fixed-vectors-zext-vp.ll        |    8 +-
 .../RISCV/rvv/fixed-vectors-zvqdotq.ll        |  136 +-
 .../CodeGen/RISCV/rvv/float-round-conv.ll     |  188 +-
 llvm/test/CodeGen/RISCV/rvv/floor-vp.ll       |  230 +-
 .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll |  369 +-
 llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll    |  580 ++--
 .../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll   |  104 +-
 .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll |  369 +-
 llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll    |  580 ++--
 .../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll   |  104 +-
 .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll     |  160 +-
 llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll     |   36 +-
 llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll     |   36 +-
 .../test/CodeGen/RISCV/rvv/half-round-conv.ll |   84 +-
 .../CodeGen/RISCV/rvv/interleave-crash.ll     |   32 +-
 .../RISCV/rvv/intrinsic-vector-match.ll       |  252 +-
 .../RISCV/rvv/legalize-scalable-vectortype.ll |    4 +-
 llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll  |   12 +-
 llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll      |   12 +-
 llvm/test/CodeGen/RISCV/rvv/load-bf16.ll      |    4 +-
 llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll   |   36 +-
 llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll       |   36 +-
 llvm/test/CodeGen/RISCV/rvv/masked-tama.ll    |    9 +-
 llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll |   56 +-
 .../CodeGen/RISCV/rvv/mscatter-combine.ll     |    4 +-
 .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll |   40 +-
 .../CodeGen/RISCV/rvv/narrow-shift-extend.ll  |   40 +-
 llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll   |  230 +-
 llvm/test/CodeGen/RISCV/rvv/pr61561.ll        |    8 +-
 llvm/test/CodeGen/RISCV/rvv/pr95865.ll        |    4 +-
 llvm/test/CodeGen/RISCV/rvv/rint-vp.ll        |  230 +-
 llvm/test/CodeGen/RISCV/rvv/round-vp.ll       |  230 +-
 llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll   |  230 +-
 llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll |  230 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll    |  606 ++--
 llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll       |  476 +--
 llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll   |  414 +--
 llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll  |    4 +-
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  |    8 +-
 llvm/test/CodeGen/RISCV/rvv/store-bf16.ll     |    4 +-
 .../RISCV/rvv/trunc-sat-clip-sdnode.ll        |  116 +-
 .../RISCV/rvv/trunc-select-to-max-usat.ll     |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll  |    8 +-
 .../RISCV/rvv/vector-deinterleave-fixed.ll    |    6 +-
 .../RISCV/rvv/vector-deinterleave-load.ll     |    4 +-
 .../CodeGen/RISCV/rvv/vector-deinterleave.ll  |  130 +-
 .../RISCV/rvv/vector-interleave-fixed.ll      |   44 +-
 .../RISCV/rvv/vector-interleave-store.ll      |   12 +-
 .../CodeGen/RISCV/rvv/vector-interleave.ll    |  392 ++-
 llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll   |  112 +-
 .../RISCV/rvv/vfadd-constrained-sdnode.ll     |  120 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll   |  104 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll       |  256 +-
 .../RISCV/rvv/vfcmp-constrained-sdnode.ll     | 3018 +++++++----------
 .../RISCV/rvv/vfcmps-constrained-sdnode.ll    | 1242 +++----
 .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll    |   92 +-
 .../RISCV/rvv/vfdiv-constrained-sdnode.ll     |  258 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll   |  162 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll       |  240 +-
 .../RISCV/rvv/vfirst-byte-compare-index.ll    |    4 +-
 llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll        | 2596 ++++++--------
 .../RISCV/rvv/vfmadd-constrained-sdnode.ll    |  190 +-
 llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll  |  284 +-
 llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll   |   96 +-
 llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll       |   80 +-
 llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll   |   96 +-
 llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll       |   80 +-
 .../RISCV/rvv/vfmsub-constrained-sdnode.ll    |  110 +-
 .../RISCV/rvv/vfmul-constrained-sdnode.ll     |  120 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll   |  104 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll       |  120 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll     |   27 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll     |   27 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll    |   27 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll |   27 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll |   45 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll |   45 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll     |   45 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll    |   45 +-
 llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll |   15 +-
 .../RISCV/rvv/vfnmadd-constrained-sdnode.ll   |  152 +-
 .../RISCV/rvv/vfnmsub-constrained-sdnode.ll   |  114 +-
 .../RISCV/rvv/vfpext-constrained-sdnode.ll    |   52 +-
 llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll  |   60 +-
 llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll      |   28 +-
 .../RISCV/rvv/vfptoi-constrained-sdnode.ll    |  396 +--
 llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll  |  484 ++-
 .../test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll |    5 +-
 llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll     |   73 +-
 .../test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll |    5 +-
 llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll     |   73 +-
 .../RISCV/rvv/vfptrunc-constrained-sdnode.ll  |   68 +-
 .../test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll |   62 +-
 llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll    |   37 +-
 .../RISCV/rvv/vfsqrt-constrained-sdnode.ll    |   48 +-
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll  |   48 +-
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll      |   80 +-
 .../RISCV/rvv/vfsub-constrained-sdnode.ll     |  136 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll   |  104 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll       |  240 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll  |   39 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd.ll         |   54 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll       |   20 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll     |   24 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll     |   36 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll    |   36 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll |   24 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll |   24 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll     |   24 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll    |   24 +-
 llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll |   12 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll |   48 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll     |   78 +-
 .../CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll   |   12 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll     |   60 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll  |   39 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmul.ll         |   54 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll    |  102 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll    |   86 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll  |   39 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub.ll         |   54 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll       |   20 +-
 .../RISCV/rvv/vitofp-constrained-sdnode.ll    |  302 +-
 llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll  |  654 ++--
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll  |  408 +--
 llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll    |   72 +-
 llvm/test/CodeGen/RISCV/rvv/vloxei.ll         |  190 +-
 llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll    |   72 +-
 llvm/test/CodeGen/RISCV/rvv/vluxei.ll         |  190 +-
 llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll |  218 +-
 .../test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll |  152 +-
 llvm/test/CodeGen/RISCV/rvv/vmsge.ll          |   76 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll         |   76 +-
 llvm/test/CodeGen/RISCV/rvv/vnclip.ll         |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vnclipu.ll        |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll   |   99 +-
 llvm/test/CodeGen/RISCV/rvv/vnsra.ll          |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll   |   99 +-
 llvm/test/CodeGen/RISCV/rvv/vnsrl.ll          |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll   |   16 +-
 .../CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll |   19 +-
 .../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll |    9 +-
 llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll  |   15 +-
 .../RISCV/rvv/vp-splice-mask-vectors.ll       |    9 +-
 .../RISCV/rvv/vp-vector-interleaved-access.ll |   36 +-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll |  476 ++-
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll |    8 +-
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     |   52 +-
 llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll    |   12 +-
 llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll    |   12 +-
 .../RISCV/rvv/vscale-vw-web-simplification.ll |   48 +-
 llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll       |    8 +-
 llvm/test/CodeGen/RISCV/rvv/vsext.ll          |   56 +-
 .../test/CodeGen/RISCV/rvv/vsitofp-vp-mask.ll |    8 +-
 llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll     |  100 +-
 llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll |    5 +-
 llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll      |   34 +-
 llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll |   62 +-
 .../test/CodeGen/RISCV/rvv/vuitofp-vp-mask.ll |    8 +-
 llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll     |  100 +-
 .../CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll    |   13 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll   |  170 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd.ll          |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll        |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu.ll         |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll       |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll   |  213 +-
 llvm/test/CodeGen/RISCV/rvv/vwmul.ll          |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll        |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulu.ll         |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll   |  202 +-
 llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll       |  186 +-
 llvm/test/CodeGen/RISCV/rvv/vwsll.ll          |  117 +-
 .../CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll    |   13 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll   |  162 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub.ll          |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll        |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu.ll         |   81 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll       |   32 +-
 llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll       |    8 +-
 llvm/test/CodeGen/RISCV/rvv/vzext.ll          |   56 +-
 .../CodeGen/RISCV/rvv/zvbb-demanded-bits.ll   |    4 +-
 llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll |  152 +-
 293 files changed, 17869 insertions(+), 21475 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index 0d57e4201512e..992dda4e796c7 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -1714,8 +1714,8 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -1724,8 +1724,8 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -1910,8 +1910,8 @@ define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -1921,8 +1921,8 @@ define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2077,8 +2077,8 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2088,8 +2088,8 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2244,8 +2244,8 @@ define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2255,8 +2255,8 @@ define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2411,8 +2411,8 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2422,8 +2422,8 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2578,8 +2578,8 @@ define i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2589,8 +2589,8 @@ define i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 0caab1f5ce2f0..674cce7284ea7 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -1792,8 +1792,8 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -1802,8 +1802,8 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2066,8 +2066,8 @@ define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2077,8 +2077,8 @@ define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2311,8 +2311,8 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2322,8 +2322,8 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2556,8 +2556,8 @@ define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2567,8 +2567,8 @@ define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2723,8 +2723,8 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2734,8 +2734,8 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2890,8 +2890,8 @@ define i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
@@ -2901,8 +2901,8 @@ define i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind {
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
 ; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
-; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/pr94265.ll b/llvm/test/CodeGen/RISCV/pr94265.ll
index f92cdb4ca7395..c4cb28f6f930a 100644
--- a/llvm/test/CodeGen/RISCV/pr94265.ll
+++ b/llvm/test/CodeGen/RISCV/pr94265.ll
@@ -12,8 +12,8 @@ define <8 x i16> @PR94265(<8 x i32> %a0) #0 {
 ; RV32I-NEXT:    vsrl.vi v10, v10, 26
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vnsrl.wi v10, v8, 6
-; RV32I-NEXT:    vsll.vi v8, v10, 10
+; RV32I-NEXT:    vnsrl.wi v8, v8, 6
+; RV32I-NEXT:    vsll.vi v8, v8, 10
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: PR94265:
@@ -23,8 +23,8 @@ define <8 x i16> @PR94265(<8 x i32> %a0) #0 {
 ; RV64I-NEXT:    vsrl.vi v10, v10, 26
 ; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vnsrl.wi v10, v8, 6
-; RV64I-NEXT:    vsll.vi v8, v10, 10
+; RV64I-NEXT:    vnsrl.wi v8, v8, 6
+; RV64I-NEXT:    vsll.vi v8, v8, 10
 ; RV64I-NEXT:    ret
   %t1 = sdiv <8 x i32> %a0, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
   %t2 = trunc <8 x i32> %t1 to <8 x i16>
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index 878b180e34c01..b93bd6f65fdd8 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -49,13 +49,13 @@ define void @last_chance_recoloring_failure() {
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
 ; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; CHECK-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, s0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwsub.wv v8, v24, v16
+; CHECK-NEXT:    vfwsub.wv v8, v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, mu
 ; CHECK-NEXT:    vfdiv.vv v8, v24, v8, v0.t
 ; CHECK-NEXT:    vse32.v v8, (a0)
@@ -109,15 +109,15 @@ define void @last_chance_recoloring_failure() {
 ; SUBREGLIVENESS-NEXT:    addi a0, a0, 16
 ; SUBREGLIVENESS-NEXT:    csrr a1, vlenb
 ; SUBREGLIVENESS-NEXT:    slli a1, a1, 2
-; SUBREGLIVENESS-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
+; SUBREGLIVENESS-NEXT:    vl4r.v v12, (a0) # vscale x 32-byte Folded Reload
 ; SUBREGLIVENESS-NEXT:    add a0, a0, a1
-; SUBREGLIVENESS-NEXT:    vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; SUBREGLIVENESS-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
 ; SUBREGLIVENESS-NEXT:    addi a0, sp, 16
-; SUBREGLIVENESS-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; SUBREGLIVENESS-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; SUBREGLIVENESS-NEXT:    vsetvli zero, s0, e16, m4, ta, ma
-; SUBREGLIVENESS-NEXT:    vfwsub.wv v8, v24, v16
+; SUBREGLIVENESS-NEXT:    vfwsub.wv v8, v16, v12
 ; SUBREGLIVENESS-NEXT:    vsetvli zero, zero, e32, m8, tu, mu
-; SUBREGLIVENESS-NEXT:    vfdiv.vv v8, v24, v8, v0.t
+; SUBREGLIVENESS-NEXT:    vfdiv.vv v8, v16, v8, v0.t
 ; SUBREGLIVENESS-NEXT:    vse32.v v8, (a0)
 ; SUBREGLIVENESS-NEXT:    csrr a0, vlenb
 ; SUBREGLIVENESS-NEXT:    slli a0, a0, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 583d872238df7..3e82a821fb45b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -59,9 +59,9 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmin.vv v10, v8, v9
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -92,9 +92,9 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmin.vv v10, v8, v9
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -125,9 +125,9 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmin.vv v10, v8, v9
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -193,9 +193,9 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vminu.vv v10, v8, v9
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -226,9 +226,9 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vminu.vv v10, v8, v9
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -259,9 +259,9 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vminu.vv v10, v8, v9
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v9, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -298,9 +298,9 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
 ; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    vminu.vv v8, v10, v9
 ; CHECK-NEXT:    vmaxu.vv v9, v10, v9
-; CHECK-NEXT:    vsub.vv v10, v9, v8
+; CHECK-NEXT:    vsub.vv v9, v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
index ec422a8fbb928..ae530e4ec933e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
@@ -112,9 +112,9 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v0, v8, a2
 ; CHECK-NEXT:    vsext.vf8 v8, v16
 ; CHECK-NEXT:    vsaddu.vx v8, v8, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 2
+; CHECK-NEXT:    vslideup.vi v0, v8, 2
 ; CHECK-NEXT:    ret
   %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
   ret <32 x i1> %mask
@@ -141,17 +141,17 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v16, v8, a2
 ; CHECK-NEXT:    vsext.vf8 v8, v17
 ; CHECK-NEXT:    vsaddu.vx v8, v8, a1
-; CHECK-NEXT:    vmsltu.vx v17, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v0, v16, 2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v17, 4
+; CHECK-NEXT:    vslideup.vi v0, v8, 4
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vsext.vf8 v8, v18
 ; CHECK-NEXT:    vsaddu.vx v8, v8, a1
-; CHECK-NEXT:    vmsltu.vx v16, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v16, 6
+; CHECK-NEXT:    vslideup.vi v0, v8, 6
 ; CHECK-NEXT:    ret
   %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc)
   ret <64 x i1> %mask
@@ -205,7 +205,7 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v21, v8, a2
 ; CHECK-NEXT:    vsext.vf8 v8, v22
 ; CHECK-NEXT:    vsaddu.vx v8, v8, a1
-; CHECK-NEXT:    vmsltu.vx v22, v8, a2
+; CHECK-NEXT:    vmsltu.vx v8, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v17, v16, 2
 ; CHECK-NEXT:    vslideup.vi v0, v20, 2
@@ -214,7 +214,7 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vslideup.vi v0, v21, 4
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslideup.vi v17, v19, 6
-; CHECK-NEXT:    vslideup.vi v0, v22, 6
+; CHECK-NEXT:    vslideup.vi v0, v8, 6
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vslideup.vi v0, v17, 8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
index 2d5258fbabfac..f13be9134f838 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
@@ -9,9 +9,9 @@ define <vscale x 8 x i32> @add(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: add:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwaddu.vv v12, v8, v9
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -23,9 +23,9 @@ define <vscale x 8 x i32> @sub(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: sub:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwsubu.vv v12, v8, v9
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -37,9 +37,9 @@ define <vscale x 8 x i32> @mul(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: mul:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwmulu.vv v12, v8, v9
+; CHECK-NEXT:    vwmulu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -53,9 +53,9 @@ define <vscale x 8 x i32> @sdiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v9
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vdivu.vv v12, v12, v10
+; CHECK-NEXT:    vdivu.vv v10, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -69,9 +69,9 @@ define <vscale x 8 x i32> @udiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v9
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vdivu.vv v12, v12, v10
+; CHECK-NEXT:    vdivu.vv v10, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -85,9 +85,9 @@ define <vscale x 8 x i32> @srem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v9
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vremu.vv v12, v12, v10
+; CHECK-NEXT:    vremu.vv v10, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -101,9 +101,9 @@ define <vscale x 8 x i32> @urem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v9
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vremu.vv v12, v12, v10
+; CHECK-NEXT:    vremu.vv v10, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -115,9 +115,9 @@ define <vscale x 8 x i32> @and(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: and:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vand.vv v12, v8, v9
+; CHECK-NEXT:    vand.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -129,9 +129,9 @@ define <vscale x 8 x i32> @or(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: or:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vor.vv v12, v8, v9
+; CHECK-NEXT:    vor.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -143,9 +143,9 @@ define <vscale x 8 x i32> @xor(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
 ; CHECK-LABEL: xor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vxor.vv v12, v8, v9
+; CHECK-NEXT:    vxor.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 2fe8c8ce7975a..430cef11af48b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va,
 define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bflo
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 3
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bflo
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,21 +681,19 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x
 define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16:
@@ -753,21 +767,19 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal
 define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_vv_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16:
@@ -841,21 +853,19 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16:
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ;
 ; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a2, 3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,20 +1107,18 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vp_ceil_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1125,20 +1149,18 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vp_ceil_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1169,20 +1191,18 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; CHECK-LABEL: vp_ceil_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1254,21 +1274,19 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca
 define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1298,21 +1316,19 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca
 define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1342,21 +1358,19 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca
 define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1386,21 +1400,19 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_vv_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 3
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
index 496755738e6fa..1fb2880ad9f36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
@@ -9,20 +9,20 @@ define void @test(ptr %ref_array, ptr %sad_array) {
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    th.lwd a2, a3, (a0), 0, 3
 ; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT:    vle8.v v12, (a2)
+; RV32-NEXT:    vle8.v v11, (a2)
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vzext.vf4 v8, v12
+; RV32-NEXT:    vzext.vf4 v8, v11
 ; RV32-NEXT:    vmv.s.x v12, zero
 ; RV32-NEXT:    vredsum.vs v8, v8, v12
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    th.swia a0, (a1), 4, 0
 ; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT:    vle8.v v13, (a3)
+; RV32-NEXT:    vle8.v v11, (a3)
 ; RV32-NEXT:    vmv.v.i v8, 0
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslideup.vi v13, v8, 4
+; RV32-NEXT:    vslideup.vi v11, v8, 4
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vzext.vf4 v8, v13
+; RV32-NEXT:    vzext.vf4 v8, v11
 ; RV32-NEXT:    vredsum.vs v8, v8, v12
 ; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a1)
@@ -32,20 +32,20 @@ define void @test(ptr %ref_array, ptr %sad_array) {
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    th.ldd a2, a3, (a0), 0, 4
 ; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT:    vle8.v v12, (a2)
+; RV64-NEXT:    vle8.v v11, (a2)
 ; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT:    vzext.vf4 v8, v12
+; RV64-NEXT:    vzext.vf4 v8, v11
 ; RV64-NEXT:    vmv.s.x v12, zero
 ; RV64-NEXT:    vredsum.vs v8, v8, v12
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    th.swia a0, (a1), 4, 0
 ; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT:    vle8.v v13, (a3)
+; RV64-NEXT:    vle8.v v11, (a3)
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslideup.vi v13, v8, 4
+; RV64-NEXT:    vslideup.vi v11, v8, 4
 ; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT:    vzext.vf4 v8, v13
+; RV64-NEXT:    vzext.vf4 v8, v11
 ; RV64-NEXT:    vredsum.vs v8, v8, v12
 ; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 319d82f724ca7..0f990a4fff6ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -162,12 +162,12 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_nxv4i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v10, v8
+; CHECK-F-NEXT:    vzext.vf2 v9, v8
 ; CHECK-F-NEXT:    li a0, 134
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-F-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 8
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -176,12 +176,12 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_nxv4i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v10, v8
+; CHECK-D-NEXT:    vzext.vf2 v9, v8
 ; CHECK-D-NEXT:    li a0, 134
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 8
 ; CHECK-D-NEXT:    vminu.vx v8, v8, a0
@@ -225,12 +225,12 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_nxv8i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v12, v8
+; CHECK-F-NEXT:    vzext.vf2 v10, v8
 ; CHECK-F-NEXT:    li a0, 134
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-F-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 8
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -239,12 +239,12 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_nxv8i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v12, v8
+; CHECK-D-NEXT:    vzext.vf2 v10, v8
 ; CHECK-D-NEXT:    li a0, 134
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 8
 ; CHECK-D-NEXT:    vminu.vx v8, v8, a0
@@ -288,12 +288,12 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_nxv16i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v16, v8
+; CHECK-F-NEXT:    vzext.vf2 v12, v8
 ; CHECK-F-NEXT:    li a0, 134
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-F-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 8
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -302,12 +302,12 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_nxv16i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v16, v8
+; CHECK-D-NEXT:    vzext.vf2 v12, v8
 ; CHECK-D-NEXT:    li a0, 134
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 8
 ; CHECK-D-NEXT:    vminu.vx v8, v8, a0
@@ -1234,10 +1234,10 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
 ; CHECK-F-NEXT:    li a0, 190
 ; CHECK-F-NEXT:    fsrmi a1, 1
 ; CHECK-F-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-F-NEXT:    vmv.v.x v8, a0
-; CHECK-F-NEXT:    vsrl.vi v9, v9, 23
-; CHECK-F-NEXT:    vwsubu.vv v10, v8, v9
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vmv.v.x v9, a0
+; CHECK-F-NEXT:    vsrl.vi v8, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v10, v9, v8
 ; CHECK-F-NEXT:    li a0, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; CHECK-F-NEXT:    vminu.vx v8, v10, a0
@@ -1374,10 +1374,10 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
 ; CHECK-F-NEXT:    li a0, 190
 ; CHECK-F-NEXT:    fsrmi a1, 1
 ; CHECK-F-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT:    vmv.v.x v11, a0
-; CHECK-F-NEXT:    vsrl.vi v10, v10, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v11, v10
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vmv.v.x v9, a0
+; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-F-NEXT:    li a0, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -1514,10 +1514,10 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
 ; CHECK-F-NEXT:    li a0, 190
 ; CHECK-F-NEXT:    fsrmi a1, 1
 ; CHECK-F-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT:    vmv.v.x v14, a0
-; CHECK-F-NEXT:    vsrl.vi v12, v12, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v14, v12
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vmv.v.x v10, a0
+; CHECK-F-NEXT:    vsrl.vi v12, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v10, v12
 ; CHECK-F-NEXT:    li a0, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -1654,10 +1654,10 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 ; CHECK-F-NEXT:    li a0, 190
 ; CHECK-F-NEXT:    fsrmi a1, 1
 ; CHECK-F-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT:    vmv.v.x v20, a0
-; CHECK-F-NEXT:    vsrl.vi v16, v16, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v20, v16
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vmv.v.x v12, a0
+; CHECK-F-NEXT:    vsrl.vi v16, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v12, v16
 ; CHECK-F-NEXT:    li a0, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-F-NEXT:    vminu.vx v8, v8, a0
@@ -1832,11 +1832,11 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v10, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-F-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v9, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 134
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1844,11 +1844,11 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v10, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v9, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 134
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -1890,11 +1890,11 @@ define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v12, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-F-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v10, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 134
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1902,11 +1902,11 @@ define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v12, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v10, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 134
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -1948,11 +1948,11 @@ define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv16i8:
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v16, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-F-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v12, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 134
 ; CHECK-F-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1960,11 +1960,11 @@ define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v16, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v12, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 134
 ; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -2837,8 +2837,8 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; CHECK-F-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-F-NEXT:    vmv.v.x v9, a0
 ; CHECK-F-NEXT:    fsrmi a0, 1
-; CHECK-F-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT:    vsrl.vi v10, v10, 23
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
 ; CHECK-F-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
@@ -2971,9 +2971,9 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; CHECK-F-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-F-NEXT:    vmv.v.x v10, a0
 ; CHECK-F-NEXT:    fsrmi a0, 1
-; CHECK-F-NEXT:    vfncvt.f.xu.w v11, v8
-; CHECK-F-NEXT:    vsrl.vi v11, v11, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v9, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v10, v9
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
@@ -3105,9 +3105,9 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 ; CHECK-F-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-F-NEXT:    vmv.v.x v12, a0
 ; CHECK-F-NEXT:    fsrmi a0, 1
-; CHECK-F-NEXT:    vfncvt.f.xu.w v14, v8
-; CHECK-F-NEXT:    vsrl.vi v14, v14, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v12, v10
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
@@ -3239,9 +3239,9 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; CHECK-F-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-F-NEXT:    vmv.v.x v16, a0
 ; CHECK-F-NEXT:    fsrmi a0, 1
-; CHECK-F-NEXT:    vfncvt.f.xu.w v20, v8
-; CHECK-F-NEXT:    vsrl.vi v20, v20, 23
-; CHECK-F-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v12, v8, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v16, v12
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 570ff34b0f23a..ce1c9b83f3753 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -120,15 +120,15 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1
 ; CHECK-LABEL: vp_ctlz_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
@@ -147,12 +147,12 @@ define <vscale x 4 x i8> @vp_ctlz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zer
 ; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v9, v8
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
@@ -173,15 +173,15 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1
 ; CHECK-LABEL: vp_ctlz_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
@@ -200,12 +200,12 @@ define <vscale x 8 x i8> @vp_ctlz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zer
 ; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
@@ -226,15 +226,15 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-LABEL: vp_ctlz_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
@@ -253,12 +253,12 @@ define <vscale x 16 x i8> @vp_ctlz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32
 ; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    li a0, 8
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
@@ -518,8 +518,8 @@ define <vscale x 4 x i16> @vp_ctlz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v10, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 16
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -565,8 +565,8 @@ define <vscale x 8 x i16> @vp_ctlz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v12, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 16
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -612,8 +612,8 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v16, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 16
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -797,8 +797,8 @@ define <vscale x 2 x i32> @vp_ctlz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 ; CHECK-NEXT:    vsrl.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -846,8 +846,8 @@ define <vscale x 4 x i32> @vp_ctlz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 ; CHECK-NEXT:    vsrl.vx v8, v12, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -895,8 +895,8 @@ define <vscale x 8 x i32> @vp_ctlz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 ; CHECK-NEXT:    vsrl.vx v8, v16, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vminu.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
@@ -1444,15 +1444,15 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -1469,11 +1469,11 @@ define <vscale x 4 x i8> @vp_ctlz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 134
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -1492,15 +1492,15 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -1517,11 +1517,11 @@ define <vscale x 8 x i8> @vp_ctlz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 134
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -1540,15 +1540,15 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8, v0.t
+; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
 ; CHECK-NEXT:    li a0, 134
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -1565,11 +1565,11 @@ define <vscale x 16 x i8> @vp_ctlz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
 ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 134
 ; CHECK-NEXT:    vrsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -1815,8 +1815,8 @@ define <vscale x 4 x i16> @vp_ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v10, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16:
@@ -1857,8 +1857,8 @@ define <vscale x 8 x i16> @vp_ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <v
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v12, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16:
@@ -1899,8 +1899,8 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v16, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16:
@@ -2073,8 +2073,8 @@ define <vscale x 2 x i32> @vp_ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
 ; CHECK-NEXT:    vsrl.vx v8, v10, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32:
@@ -2117,8 +2117,8 @@ define <vscale x 4 x i32> @vp_ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
 ; CHECK-NEXT:    vsrl.vx v8, v12, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32:
@@ -2161,8 +2161,8 @@ define <vscale x 8 x i32> @vp_ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
 ; CHECK-NEXT:    vsrl.vx v8, v16, a0, v0.t
 ; CHECK-NEXT:    li a0, 1054
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vrsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vrsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index faa3c48c49e50..6902edb5204bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -166,8 +166,8 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vand.vv v9, v8, v9
 ; CHECK-F-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v12, v9
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v10, v12
+; CHECK-F-NEXT:    vzext.vf2 v11, v9
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v10, v11
 ; CHECK-F-NEXT:    vnsrl.wi v9, v10, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-F-NEXT:    vnsrl.wi v9, v9, 0
@@ -183,8 +183,8 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vand.vv v9, v8, v9
 ; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v12, v9
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v12
+; CHECK-D-NEXT:    vzext.vf2 v11, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v11
 ; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-D-NEXT:    vnsrl.wi v9, v9, 0
@@ -561,10 +561,10 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-F-NEXT:    vand.vv v10, v8, v9
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-F-NEXT:    vnsrl.wi v10, v8, 23
-; CHECK-F-NEXT:    vsub.vx v8, v10, a0
+; CHECK-F-NEXT:    vand.vv v9, v8, v9
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 16
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-F-NEXT:    ret
@@ -575,10 +575,10 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-D-NEXT:    vand.vv v10, v8, v9
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
-; CHECK-D-NEXT:    vsub.vx v8, v10, a0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 16
 ; CHECK-D-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-D-NEXT:    ret
@@ -627,10 +627,10 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-F-NEXT:    vand.vv v12, v8, v10
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-F-NEXT:    vnsrl.wi v12, v8, 23
-; CHECK-F-NEXT:    vsub.vx v8, v12, a0
+; CHECK-F-NEXT:    vand.vv v10, v8, v10
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 16
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-F-NEXT:    ret
@@ -641,10 +641,10 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-D-NEXT:    vand.vv v12, v8, v10
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
-; CHECK-D-NEXT:    vsub.vx v8, v12, a0
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 16
 ; CHECK-D-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-D-NEXT:    ret
@@ -693,10 +693,10 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v12, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-F-NEXT:    vand.vv v16, v8, v12
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-F-NEXT:    vnsrl.wi v16, v8, 23
-; CHECK-F-NEXT:    vsub.vx v8, v16, a0
+; CHECK-F-NEXT:    vand.vv v12, v8, v12
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    li a0, 16
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-F-NEXT:    ret
@@ -707,10 +707,10 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-D-NEXT:    vand.vv v16, v8, v12
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
-; CHECK-D-NEXT:    vsub.vx v8, v16, a0
+; CHECK-D-NEXT:    vand.vv v12, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 16
 ; CHECK-D-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; CHECK-D-NEXT:    ret
@@ -1327,8 +1327,8 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v10
-; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
-; CHECK-F-NEXT:    vwsubu.vx v8, v10, a1
+; CHECK-F-NEXT:    vsrl.vi v9, v8, 23
+; CHECK-F-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-F-NEXT:    li a1, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a1, v0
@@ -1451,8 +1451,8 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v12
-; CHECK-F-NEXT:    vsrl.vi v12, v8, 23
-; CHECK-F-NEXT:    vwsubu.vx v8, v12, a1
+; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
+; CHECK-F-NEXT:    vwsubu.vx v8, v10, a1
 ; CHECK-F-NEXT:    li a1, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a1, v0
@@ -1575,8 +1575,8 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
 ; CHECK-F-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v16
-; CHECK-F-NEXT:    vsrl.vi v16, v8, 23
-; CHECK-F-NEXT:    vwsubu.vx v8, v16, a1
+; CHECK-F-NEXT:    vsrl.vi v12, v8, 23
+; CHECK-F-NEXT:    vwsubu.vx v8, v12, a1
 ; CHECK-F-NEXT:    li a1, 64
 ; CHECK-F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-F-NEXT:    vmerge.vxm v8, v8, a1, v0
@@ -1757,11 +1757,11 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-F-NEXT:    vand.vv v8, v8, v9
 ; CHECK-F-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v10, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-F-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v9, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1772,11 +1772,11 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-D-NEXT:    vand.vv v8, v8, v9
 ; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v10, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v9, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -1817,11 +1817,11 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-F-NEXT:    vand.vv v8, v8, v9
 ; CHECK-F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v12, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-F-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v10, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1832,11 +1832,11 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-D-NEXT:    vand.vv v8, v8, v9
 ; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v12, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v10, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -1877,11 +1877,11 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-F-NEXT:    vand.vv v8, v8, v10
 ; CHECK-F-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-F-NEXT:    vzext.vf2 v16, v8
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-F-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-F-NEXT:    vzext.vf2 v12, v8
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-F-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-F-NEXT:    li a0, 127
 ; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
@@ -1892,11 +1892,11 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-D-NEXT:    vand.vv v8, v8, v10
 ; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-D-NEXT:    vzext.vf2 v16, v8
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vzext.vf2 v12, v8
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-D-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-D-NEXT:    li a0, 127
 ; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
@@ -2120,22 +2120,22 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-F-NEXT:    vand.vv v10, v8, v9
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-F-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-F-NEXT:    vand.vv v9, v8, v9
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    li a0, 127
-; CHECK-F-NEXT:    vsub.vx v8, v10, a0
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: cttz_zero_undef_nxv4i16:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-D-NEXT:    vand.vv v10, v8, v9
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    li a0, 127
-; CHECK-D-NEXT:    vsub.vx v8, v10, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i16:
@@ -2179,22 +2179,22 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-F-NEXT:    vrsub.vi v10, v8, 0
-; CHECK-F-NEXT:    vand.vv v12, v8, v10
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-F-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-F-NEXT:    vand.vv v10, v8, v10
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    li a0, 127
-; CHECK-F-NEXT:    vsub.vx v8, v12, a0
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: cttz_zero_undef_nxv8i16:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
-; CHECK-D-NEXT:    vand.vv v12, v8, v10
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    li a0, 127
-; CHECK-D-NEXT:    vsub.vx v8, v12, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i16:
@@ -2238,22 +2238,22 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; CHECK-F:       # %bb.0:
 ; CHECK-F-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-F-NEXT:    vrsub.vi v12, v8, 0
-; CHECK-F-NEXT:    vand.vv v16, v8, v12
-; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-F-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-F-NEXT:    vand.vv v12, v8, v12
+; CHECK-F-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-F-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-F-NEXT:    li a0, 127
-; CHECK-F-NEXT:    vsub.vx v8, v16, a0
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: cttz_zero_undef_nxv16i16:
 ; CHECK-D:       # %bb.0:
 ; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
-; CHECK-D-NEXT:    vand.vv v16, v8, v12
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vand.vv v12, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-D-NEXT:    li a0, 127
-; CHECK-D-NEXT:    vsub.vx v8, v16, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv16i16:
@@ -2412,11 +2412,11 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-D-NEXT:    li a0, 52
-; CHECK-D-NEXT:    vand.vv v10, v8, v9
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-D-NEXT:    vnsrl.wx v10, v8, a0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 1023
-; CHECK-D-NEXT:    vsub.vx v8, v10, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv2i32:
@@ -2475,11 +2475,11 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-D-NEXT:    li a0, 52
-; CHECK-D-NEXT:    vand.vv v12, v8, v10
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-D-NEXT:    vnsrl.wx v12, v8, a0
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 1023
-; CHECK-D-NEXT:    vsub.vx v8, v12, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i32:
@@ -2538,11 +2538,11 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
 ; CHECK-D-NEXT:    li a0, 52
-; CHECK-D-NEXT:    vand.vv v16, v8, v12
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-D-NEXT:    vnsrl.wx v16, v8, a0
+; CHECK-D-NEXT:    vand.vv v12, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-D-NEXT:    li a0, 1023
-; CHECK-D-NEXT:    vsub.vx v8, v16, a0
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-D-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i32:
@@ -2704,8 +2704,8 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; CHECK-F-NEXT:    fsrmi a0, 1
 ; CHECK-F-NEXT:    vand.vv v8, v8, v9
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-F-NEXT:    vsrl.vi v9, v9, 23
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v9, v8, 23
 ; CHECK-F-NEXT:    li a1, 127
 ; CHECK-F-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-F-NEXT:    fsrm a0
@@ -2820,10 +2820,10 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; CHECK-F-NEXT:    fsrmi a0, 1
 ; CHECK-F-NEXT:    vand.vv v8, v8, v10
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT:    vsrl.vi v10, v10, 23
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v9, v8, 23
 ; CHECK-F-NEXT:    li a1, 127
-; CHECK-F-NEXT:    vwsubu.vx v8, v10, a1
+; CHECK-F-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
@@ -2936,10 +2936,10 @@ define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 ; CHECK-F-NEXT:    fsrmi a0, 1
 ; CHECK-F-NEXT:    vand.vv v8, v8, v12
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT:    vsrl.vi v12, v12, 23
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v10, v8, 23
 ; CHECK-F-NEXT:    li a1, 127
-; CHECK-F-NEXT:    vwsubu.vx v8, v12, a1
+; CHECK-F-NEXT:    vwsubu.vx v8, v10, a1
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
@@ -3052,10 +3052,10 @@ define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; CHECK-F-NEXT:    fsrmi a0, 1
 ; CHECK-F-NEXT:    vand.vv v8, v8, v16
 ; CHECK-F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-F-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT:    vsrl.vi v16, v16, 23
+; CHECK-F-NEXT:    vfncvt.f.xu.w v8, v8
+; CHECK-F-NEXT:    vsrl.vi v12, v8, 23
 ; CHECK-F-NEXT:    li a1, 127
-; CHECK-F-NEXT:    vwsubu.vx v8, v16, a1
+; CHECK-F-NEXT:    vwsubu.vx v8, v12, a1
 ; CHECK-F-NEXT:    fsrm a0
 ; CHECK-F-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 6bf882fe47fef..bb00f5f4bc28d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -2727,14 +2727,14 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -2754,11 +2754,11 @@ define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vand.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -2781,14 +2781,14 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -2808,11 +2808,11 @@ define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vand.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -2835,14 +2835,14 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <v
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
@@ -2862,11 +2862,11 @@ define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8
 ; CHECK-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-NEXT:    vand.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    li a0, 127
 ; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
@@ -3101,13 +3101,13 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vand.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vand.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16:
@@ -3124,11 +3124,11 @@ define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-NEXT:    vand.vv v10, v8, v9
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-NEXT:    vand.vv v9, v8, v9
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsub.vx v8, v10, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
@@ -3147,13 +3147,13 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vand.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT:    vand.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16:
@@ -3170,11 +3170,11 @@ define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vrsub.vi v10, v8, 0
-; CHECK-NEXT:    vand.vv v12, v8, v10
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-NEXT:    vand.vv v10, v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsub.vx v8, v12, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
@@ -3193,13 +3193,13 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va,
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vrsub.vi v12, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vand.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT:    vand.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16:
@@ -3216,11 +3216,11 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vrsub.vi v12, v8, 0
-; CHECK-NEXT:    vand.vv v16, v8, v12
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-NEXT:    vand.vv v12, v8, v12
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 23
 ; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsub.vx v8, v16, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
@@ -3362,14 +3362,14 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <v
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vand.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 1023
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32:
@@ -3387,11 +3387,11 @@ define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v10, v8, v9
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
+; CHECK-NEXT:    vand.vv v9, v8, v9
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    li a0, 1023
-; CHECK-NEXT:    vsub.vx v8, v10, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
@@ -3410,14 +3410,14 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <v
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
+; CHECK-NEXT:    vand.vv v10, v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 1023
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v12, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32:
@@ -3435,11 +3435,11 @@ define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vrsub.vi v10, v8, 0
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v12, v8, v10
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
+; CHECK-NEXT:    vand.vv v10, v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    li a0, 1023
-; CHECK-NEXT:    vsub.vx v8, v12, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
@@ -3458,14 +3458,14 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <v
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vrsub.vi v12, v8, 0, v0.t
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
+; CHECK-NEXT:    vand.vv v12, v8, v12, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    li a0, 1023
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT:    vsub.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32:
@@ -3483,11 +3483,11 @@ define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vrsub.vi v12, v8, 0
 ; CHECK-NEXT:    li a0, 52
-; CHECK-NEXT:    vand.vv v16, v8, v12
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
+; CHECK-NEXT:    vand.vv v12, v8, v12
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    li a0, 1023
-; CHECK-NEXT:    vsub.vx v8, v16, a0
+; CHECK-NEXT:    vsub.vx v8, v8, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
index 8c63c2d4be8c1..6b57c66196dcb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
@@ -23,9 +23,9 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -42,9 +42,9 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -66,9 +66,9 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -85,9 +85,9 @@ define <vscale x 1 x i8> @trunc_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -109,9 +109,9 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_si16:
@@ -126,9 +126,9 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -148,9 +148,9 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_ui16:
@@ -165,9 +165,9 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -178,15 +178,13 @@ define <vscale x 1 x i32> @trunc_nxv1f64_to_si32(<vscale x 1 x double> %x) {
 ; RV32-LABEL: trunc_nxv1f64_to_si32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -197,15 +195,13 @@ define <vscale x 1 x i32> @trunc_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 ; RV32-LABEL: trunc_nxv1f64_to_ui32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -265,9 +261,9 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -284,9 +280,9 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -308,9 +304,9 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -327,9 +323,9 @@ define <vscale x 4 x i8> @trunc_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -351,9 +347,9 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_si16:
@@ -368,9 +364,9 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -390,9 +386,9 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_ui16:
@@ -407,9 +403,9 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -420,15 +416,13 @@ define <vscale x 4 x i32> @trunc_nxv4f64_to_si32(<vscale x 4 x double> %x) {
 ; RV32-LABEL: trunc_nxv4f64_to_si32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
@@ -439,15 +433,13 @@ define <vscale x 4 x i32> @trunc_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 ; RV32-LABEL: trunc_nxv4f64_to_ui32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
@@ -509,9 +501,9 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -530,9 +522,9 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -556,9 +548,9 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -577,9 +569,9 @@ define <vscale x 1 x i8> @ceil_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -603,9 +595,9 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_si16:
@@ -622,9 +614,9 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -646,9 +638,9 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_ui16:
@@ -665,9 +657,9 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -679,18 +671,16 @@ define <vscale x 1 x i32> @ceil_nxv1f64_to_si32(<vscale x 1 x double> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -702,18 +692,16 @@ define <vscale x 1 x i32> @ceil_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -783,9 +771,9 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -804,9 +792,9 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -830,9 +818,9 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -851,9 +839,9 @@ define <vscale x 4 x i8> @ceil_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -877,9 +865,9 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_si16:
@@ -896,9 +884,9 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -920,9 +908,9 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_ui16:
@@ -939,9 +927,9 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -953,18 +941,16 @@ define <vscale x 4 x i32> @ceil_nxv4f64_to_si32(<vscale x 4 x double> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv.v.v v8, v12
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
@@ -976,18 +962,16 @@ define <vscale x 4 x i32> @ceil_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv.v.v v8, v12
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv.v.v v8, v12
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
@@ -1055,9 +1039,9 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -1074,9 +1058,9 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_si8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -1098,9 +1082,9 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -1117,9 +1101,9 @@ define <vscale x 1 x i8> @rint_nxv1f64_to_ui8(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -1141,9 +1125,9 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si16:
@@ -1158,9 +1142,9 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_si16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -1180,9 +1164,9 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui16:
@@ -1197,9 +1181,9 @@ define <vscale x 1 x i16> @rint_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i16>
@@ -1210,15 +1194,13 @@ define <vscale x 1 x i32> @rint_nxv1f64_to_si32(<vscale x 1 x double> %x) {
 ; RV32-LABEL: rint_nxv1f64_to_si32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv1f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> %x)
   %b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -1229,15 +1211,13 @@ define <vscale x 1 x i32> @rint_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
 ; RV32-LABEL: rint_nxv1f64_to_ui32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv1f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> %x)
   %b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i32>
@@ -1297,9 +1277,9 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -1316,9 +1296,9 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_si8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -1340,9 +1320,9 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
@@ -1359,9 +1339,9 @@ define <vscale x 4 x i8> @rint_nxv4f64_to_ui8(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
@@ -1383,9 +1363,9 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si16:
@@ -1400,9 +1380,9 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_si16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -1422,9 +1402,9 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui16:
@@ -1439,9 +1419,9 @@ define <vscale x 4 x i16> @rint_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i16>
@@ -1452,15 +1432,13 @@ define <vscale x 4 x i32> @rint_nxv4f64_to_si32(<vscale x 4 x double> %x) {
 ; RV32-LABEL: rint_nxv4f64_to_si32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v12, v8
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f64_to_si32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v12, v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> %x)
   %b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
@@ -1471,15 +1449,13 @@ define <vscale x 4 x i32> @rint_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
 ; RV32-LABEL: rint_nxv4f64_to_ui32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v12, v8
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f64_to_ui32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v12, v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> %x)
   %b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
index 63fd13d98c7ad..abeb5a8d4a4bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
@@ -234,9 +234,9 @@ define <vscale x 4 x i64> @zextload_nxv4i8_nxv4i64(ptr %x) {
 define <vscale x 8 x i16> @sextload_nxv8i8_nxv8i16(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v10, (a0)
+; CHECK-NEXT:    vl1r.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i16>
@@ -246,9 +246,9 @@ define <vscale x 8 x i16> @sextload_nxv8i8_nxv8i16(ptr %x) {
 define <vscale x 8 x i16> @zextload_nxv8i8_nxv8i16(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v10, (a0)
+; CHECK-NEXT:    vl1r.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
@@ -258,9 +258,9 @@ define <vscale x 8 x i16> @zextload_nxv8i8_nxv8i16(ptr %x) {
 define <vscale x 8 x i32> @sextload_nxv8i8_nxv8i32(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v12, (a0)
+; CHECK-NEXT:    vl1r.v v11, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i32>
@@ -270,9 +270,9 @@ define <vscale x 8 x i32> @sextload_nxv8i8_nxv8i32(ptr %x) {
 define <vscale x 8 x i32> @zextload_nxv8i8_nxv8i32(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v12, (a0)
+; CHECK-NEXT:    vl1r.v v11, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i32>
@@ -282,9 +282,9 @@ define <vscale x 8 x i32> @zextload_nxv8i8_nxv8i32(ptr %x) {
 define <vscale x 8 x i64> @sextload_nxv8i8_nxv8i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v16, (a0)
+; CHECK-NEXT:    vl1r.v v15, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v8, v16
+; CHECK-NEXT:    vsext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i64>
@@ -294,9 +294,9 @@ define <vscale x 8 x i64> @sextload_nxv8i8_nxv8i64(ptr %x) {
 define <vscale x 8 x i64> @zextload_nxv8i8_nxv8i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1r.v v16, (a0)
+; CHECK-NEXT:    vl1r.v v15, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v8, v16
+; CHECK-NEXT:    vzext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i8>, ptr %x
   %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i64>
@@ -306,9 +306,9 @@ define <vscale x 8 x i64> @zextload_nxv8i8_nxv8i64(ptr %x) {
 define <vscale x 16 x i16> @sextload_nxv16i8_nxv16i16(ptr %x) {
 ; CHECK-LABEL: sextload_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2r.v v12, (a0)
+; CHECK-NEXT:    vl2r.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i8>, ptr %x
   %z = sext <vscale x 16 x i8> %y to <vscale x 16 x i16>
@@ -318,9 +318,9 @@ define <vscale x 16 x i16> @sextload_nxv16i8_nxv16i16(ptr %x) {
 define <vscale x 16 x i16> @zextload_nxv16i8_nxv16i16(ptr %x) {
 ; CHECK-LABEL: zextload_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2r.v v12, (a0)
+; CHECK-NEXT:    vl2r.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i8>, ptr %x
   %z = zext <vscale x 16 x i8> %y to <vscale x 16 x i16>
@@ -330,9 +330,9 @@ define <vscale x 16 x i16> @zextload_nxv16i8_nxv16i16(ptr %x) {
 define <vscale x 16 x i32> @sextload_nxv16i8_nxv16i32(ptr %x) {
 ; CHECK-LABEL: sextload_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2r.v v16, (a0)
+; CHECK-NEXT:    vl2r.v v14, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i8>, ptr %x
   %z = sext <vscale x 16 x i8> %y to <vscale x 16 x i32>
@@ -342,9 +342,9 @@ define <vscale x 16 x i32> @sextload_nxv16i8_nxv16i32(ptr %x) {
 define <vscale x 16 x i32> @zextload_nxv16i8_nxv16i32(ptr %x) {
 ; CHECK-LABEL: zextload_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2r.v v16, (a0)
+; CHECK-NEXT:    vl2r.v v14, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i8>, ptr %x
   %z = zext <vscale x 16 x i8> %y to <vscale x 16 x i32>
@@ -354,9 +354,9 @@ define <vscale x 16 x i32> @zextload_nxv16i8_nxv16i32(ptr %x) {
 define <vscale x 32 x i16> @sextload_nxv32i8_nxv32i16(ptr %x) {
 ; CHECK-LABEL: sextload_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4r.v v16, (a0)
+; CHECK-NEXT:    vl4r.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 32 x i8>, ptr %x
   %z = sext <vscale x 32 x i8> %y to <vscale x 32 x i16>
@@ -366,9 +366,9 @@ define <vscale x 32 x i16> @sextload_nxv32i8_nxv32i16(ptr %x) {
 define <vscale x 32 x i16> @zextload_nxv32i8_nxv32i16(ptr %x) {
 ; CHECK-LABEL: zextload_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4r.v v16, (a0)
+; CHECK-NEXT:    vl4r.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 32 x i8>, ptr %x
   %z = zext <vscale x 32 x i8> %y to <vscale x 32 x i16>
@@ -523,9 +523,9 @@ define void @truncstore_nxv4i16_nxv4i8(<vscale x 4 x i16> %x, ptr %z) {
 define <vscale x 4 x i32> @sextload_nxv4i16_nxv4i32(ptr %x) {
 ; CHECK-LABEL: sextload_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v10, (a0)
+; CHECK-NEXT:    vl1re16.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i16>, ptr %x
   %z = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
@@ -535,9 +535,9 @@ define <vscale x 4 x i32> @sextload_nxv4i16_nxv4i32(ptr %x) {
 define <vscale x 4 x i32> @zextload_nxv4i16_nxv4i32(ptr %x) {
 ; CHECK-LABEL: zextload_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v10, (a0)
+; CHECK-NEXT:    vl1re16.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i16>, ptr %x
   %z = zext <vscale x 4 x i16> %y to <vscale x 4 x i32>
@@ -547,9 +547,9 @@ define <vscale x 4 x i32> @zextload_nxv4i16_nxv4i32(ptr %x) {
 define <vscale x 4 x i64> @sextload_nxv4i16_nxv4i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v12, (a0)
+; CHECK-NEXT:    vl1re16.v v11, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i16>, ptr %x
   %z = sext <vscale x 4 x i16> %y to <vscale x 4 x i64>
@@ -559,9 +559,9 @@ define <vscale x 4 x i64> @sextload_nxv4i16_nxv4i64(ptr %x) {
 define <vscale x 4 x i64> @zextload_nxv4i16_nxv4i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v12, (a0)
+; CHECK-NEXT:    vl1re16.v v11, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i16>, ptr %x
   %z = zext <vscale x 4 x i16> %y to <vscale x 4 x i64>
@@ -572,8 +572,8 @@ define void @truncstore_nxv8i16_nxv8i8(<vscale x 8 x i16> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i16> %x to <vscale x 8 x i8>
   store <vscale x 8 x i8> %y, ptr %z
@@ -583,9 +583,9 @@ define void @truncstore_nxv8i16_nxv8i8(<vscale x 8 x i16> %x, ptr %z) {
 define <vscale x 8 x i32> @sextload_nxv8i16_nxv8i32(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re16.v v12, (a0)
+; CHECK-NEXT:    vl2re16.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i16>, ptr %x
   %z = sext <vscale x 8 x i16> %y to <vscale x 8 x i32>
@@ -595,9 +595,9 @@ define <vscale x 8 x i32> @sextload_nxv8i16_nxv8i32(ptr %x) {
 define <vscale x 8 x i32> @zextload_nxv8i16_nxv8i32(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re16.v v12, (a0)
+; CHECK-NEXT:    vl2re16.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i16>, ptr %x
   %z = zext <vscale x 8 x i16> %y to <vscale x 8 x i32>
@@ -607,9 +607,9 @@ define <vscale x 8 x i32> @zextload_nxv8i16_nxv8i32(ptr %x) {
 define <vscale x 8 x i64> @sextload_nxv8i16_nxv8i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re16.v v16, (a0)
+; CHECK-NEXT:    vl2re16.v v14, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i16>, ptr %x
   %z = sext <vscale x 8 x i16> %y to <vscale x 8 x i64>
@@ -619,9 +619,9 @@ define <vscale x 8 x i64> @sextload_nxv8i16_nxv8i64(ptr %x) {
 define <vscale x 8 x i64> @zextload_nxv8i16_nxv8i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re16.v v16, (a0)
+; CHECK-NEXT:    vl2re16.v v14, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i16>, ptr %x
   %z = zext <vscale x 8 x i16> %y to <vscale x 8 x i64>
@@ -632,8 +632,8 @@ define void @truncstore_nxv16i16_nxv16i8(<vscale x 16 x i16> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i16> %x to <vscale x 16 x i8>
   store <vscale x 16 x i8> %y, ptr %z
@@ -643,9 +643,9 @@ define void @truncstore_nxv16i16_nxv16i8(<vscale x 16 x i16> %x, ptr %z) {
 define <vscale x 16 x i32> @sextload_nxv16i16_nxv16i32(ptr %x) {
 ; CHECK-LABEL: sextload_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re16.v v16, (a0)
+; CHECK-NEXT:    vl4re16.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i16>, ptr %x
   %z = sext <vscale x 16 x i16> %y to <vscale x 16 x i32>
@@ -655,9 +655,9 @@ define <vscale x 16 x i32> @sextload_nxv16i16_nxv16i32(ptr %x) {
 define <vscale x 16 x i32> @zextload_nxv16i16_nxv16i32(ptr %x) {
 ; CHECK-LABEL: zextload_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re16.v v16, (a0)
+; CHECK-NEXT:    vl4re16.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x i16>, ptr %x
   %z = zext <vscale x 16 x i16> %y to <vscale x 16 x i32>
@@ -668,8 +668,8 @@ define void @truncstore_nxv32i16_nxv32i8(<vscale x 32 x i16> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vs4r.v v16, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 32 x i16> %x to <vscale x 32 x i8>
   store <vscale x 32 x i8> %y, ptr %z
@@ -755,9 +755,9 @@ define void @truncstore_nxv2i32_nxv2i16(<vscale x 2 x i32> %x, ptr %z) {
 define <vscale x 2 x i64> @sextload_nxv2i32_nxv2i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re32.v v10, (a0)
+; CHECK-NEXT:    vl1re32.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 2 x i32>, ptr %x
   %z = sext <vscale x 2 x i32> %y to <vscale x 2 x i64>
@@ -767,9 +767,9 @@ define <vscale x 2 x i64> @sextload_nxv2i32_nxv2i64(ptr %x) {
 define <vscale x 2 x i64> @zextload_nxv2i32_nxv2i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re32.v v10, (a0)
+; CHECK-NEXT:    vl1re32.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 2 x i32>, ptr %x
   %z = zext <vscale x 2 x i32> %y to <vscale x 2 x i64>
@@ -780,9 +780,9 @@ define void @truncstore_nxv4i32_nxv4i8(<vscale x 4 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i8>
@@ -794,8 +794,8 @@ define void @truncstore_nxv4i32_nxv4i16(<vscale x 4 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i16>
   store <vscale x 4 x i16> %y, ptr %z
@@ -805,9 +805,9 @@ define void @truncstore_nxv4i32_nxv4i16(<vscale x 4 x i32> %x, ptr %z) {
 define <vscale x 4 x i64> @sextload_nxv4i32_nxv4i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re32.v v12, (a0)
+; CHECK-NEXT:    vl2re32.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i32>, ptr %x
   %z = sext <vscale x 4 x i32> %y to <vscale x 4 x i64>
@@ -817,9 +817,9 @@ define <vscale x 4 x i64> @sextload_nxv4i32_nxv4i64(ptr %x) {
 define <vscale x 4 x i64> @zextload_nxv4i32_nxv4i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re32.v v12, (a0)
+; CHECK-NEXT:    vl2re32.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x i32>, ptr %x
   %z = zext <vscale x 4 x i32> %y to <vscale x 4 x i64>
@@ -830,9 +830,9 @@ define void @truncstore_nxv8i32_nxv8i8(<vscale x 8 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i8>
@@ -844,8 +844,8 @@ define void @truncstore_nxv8i32_nxv8i16(<vscale x 8 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i16>
   store <vscale x 8 x i16> %y, ptr %z
@@ -855,9 +855,9 @@ define void @truncstore_nxv8i32_nxv8i16(<vscale x 8 x i32> %x, ptr %z) {
 define <vscale x 8 x i64> @sextload_nxv8i32_nxv8i64(ptr %x) {
 ; CHECK-LABEL: sextload_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re32.v v16, (a0)
+; CHECK-NEXT:    vl4re32.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i32>, ptr %x
   %z = sext <vscale x 8 x i32> %y to <vscale x 8 x i64>
@@ -867,9 +867,9 @@ define <vscale x 8 x i64> @sextload_nxv8i32_nxv8i64(ptr %x) {
 define <vscale x 8 x i64> @zextload_nxv8i32_nxv8i64(ptr %x) {
 ; CHECK-LABEL: zextload_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re32.v v16, (a0)
+; CHECK-NEXT:    vl4re32.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x i32>, ptr %x
   %z = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
@@ -880,9 +880,9 @@ define void @truncstore_nxv16i32_nxv16i8(<vscale x 16 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i8>
@@ -894,8 +894,8 @@ define void @truncstore_nxv16i32_nxv16i16(<vscale x 16 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vs4r.v v16, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i16>
   store <vscale x 16 x i16> %y, ptr %z
@@ -948,9 +948,9 @@ define void @truncstore_nxv2i64_nxv2i8(<vscale x 2 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
@@ -964,9 +964,9 @@ define void @truncstore_nxv2i64_nxv2i16(<vscale x 2 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i16>
@@ -978,8 +978,8 @@ define void @truncstore_nxv2i64_nxv2i32(<vscale x 2 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
   store <vscale x 2 x i32> %y, ptr %z
@@ -990,9 +990,9 @@ define void @truncstore_nxv4i64_nxv4i8(<vscale x 4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
@@ -1006,9 +1006,9 @@ define void @truncstore_nxv4i64_nxv4i16(<vscale x 4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i16>
@@ -1020,8 +1020,8 @@ define void @truncstore_nxv4i64_nxv4i32(<vscale x 4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i32>
   store <vscale x 4 x i32> %y, ptr %z
@@ -1032,12 +1032,12 @@ define void @truncstore_nxv8i64_nxv8i8(<vscale x 8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i8>
   store <vscale x 8 x i8> %y, ptr %z
@@ -1048,9 +1048,9 @@ define void @truncstore_nxv8i64_nxv8i16(<vscale x 8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i16>
@@ -1062,8 +1062,8 @@ define void @truncstore_nxv8i64_nxv8i32(<vscale x 8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vs4r.v v16, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i32>
   store <vscale x 8 x i32> %y, ptr %z
@@ -1113,9 +1113,9 @@ define <vscale x 2 x double> @extload_nxv2f16_nxv2f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 2 x half>, ptr %x
   %z = fpext <vscale x 2 x half> %y to <vscale x 2 x double>
@@ -1125,9 +1125,9 @@ define <vscale x 2 x double> @extload_nxv2f16_nxv2f64(ptr %x) {
 define <vscale x 4 x float> @extload_nxv4f16_nxv4f32(ptr %x) {
 ; CHECK-LABEL: extload_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v10, (a0)
+; CHECK-NEXT:    vl1re16.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x half>, ptr %x
   %z = fpext <vscale x 4 x half> %y to <vscale x 4 x float>
@@ -1139,9 +1139,9 @@ define <vscale x 4 x double> @extload_nxv4f16_nxv4f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x half>, ptr %x
   %z = fpext <vscale x 4 x half> %y to <vscale x 4 x double>
@@ -1151,9 +1151,9 @@ define <vscale x 4 x double> @extload_nxv4f16_nxv4f64(ptr %x) {
 define <vscale x 8 x float> @extload_nxv8f16_nxv8f32(ptr %x) {
 ; CHECK-LABEL: extload_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re16.v v12, (a0)
+; CHECK-NEXT:    vl2re16.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x half>, ptr %x
   %z = fpext <vscale x 8 x half> %y to <vscale x 8 x float>
@@ -1165,9 +1165,9 @@ define <vscale x 8 x double> @extload_nxv8f16_nxv8f64(ptr %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re16.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x half>, ptr %x
   %z = fpext <vscale x 8 x half> %y to <vscale x 8 x double>
@@ -1177,9 +1177,9 @@ define <vscale x 8 x double> @extload_nxv8f16_nxv8f64(ptr %x) {
 define <vscale x 16 x float> @extload_nxv16f16_nxv16f32(ptr %x) {
 ; CHECK-LABEL: extload_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re16.v v16, (a0)
+; CHECK-NEXT:    vl4re16.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 16 x half>, ptr %x
   %z = fpext <vscale x 16 x half> %y to <vscale x 16 x float>
@@ -1190,8 +1190,8 @@ define void @truncstore_nxv1f32_nxv1f16(<vscale x 1 x float> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 1 x float> %x to <vscale x 1 x half>
   store <vscale x 1 x half> %y, ptr %z
@@ -1214,8 +1214,8 @@ define void @truncstore_nxv2f32_nxv2f16(<vscale x 2 x float> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 2 x float> %x to <vscale x 2 x half>
   store <vscale x 2 x half> %y, ptr %z
@@ -1225,9 +1225,9 @@ define void @truncstore_nxv2f32_nxv2f16(<vscale x 2 x float> %x, ptr %z) {
 define <vscale x 2 x double> @extload_nxv2f32_nxv2f64(ptr %x) {
 ; CHECK-LABEL: extload_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re32.v v10, (a0)
+; CHECK-NEXT:    vl1re32.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %y = load <vscale x 2 x float>, ptr %x
   %z = fpext <vscale x 2 x float> %y to <vscale x 2 x double>
@@ -1238,8 +1238,8 @@ define void @truncstore_nxv4f32_nxv4f16(<vscale x 4 x float> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 4 x float> %x to <vscale x 4 x half>
   store <vscale x 4 x half> %y, ptr %z
@@ -1249,9 +1249,9 @@ define void @truncstore_nxv4f32_nxv4f16(<vscale x 4 x float> %x, ptr %z) {
 define <vscale x 4 x double> @extload_nxv4f32_nxv4f64(ptr %x) {
 ; CHECK-LABEL: extload_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl2re32.v v12, (a0)
+; CHECK-NEXT:    vl2re32.v v10, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %y = load <vscale x 4 x float>, ptr %x
   %z = fpext <vscale x 4 x float> %y to <vscale x 4 x double>
@@ -1262,8 +1262,8 @@ define void @truncstore_nxv8f32_nxv8f16(<vscale x 8 x float> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vs2r.v v12, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 8 x float> %x to <vscale x 8 x half>
   store <vscale x 8 x half> %y, ptr %z
@@ -1273,9 +1273,9 @@ define void @truncstore_nxv8f32_nxv8f16(<vscale x 8 x float> %x, ptr %z) {
 define <vscale x 8 x double> @extload_nxv8f32_nxv8f64(ptr %x) {
 ; CHECK-LABEL: extload_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4re32.v v16, (a0)
+; CHECK-NEXT:    vl4re32.v v12, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %y = load <vscale x 8 x float>, ptr %x
   %z = fpext <vscale x 8 x float> %y to <vscale x 8 x double>
@@ -1286,8 +1286,8 @@ define void @truncstore_nxv16f32_nxv16f16(<vscale x 16 x float> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vs4r.v v16, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 16 x float> %x to <vscale x 16 x half>
   store <vscale x 16 x half> %y, ptr %z
@@ -1298,9 +1298,9 @@ define void @truncstore_nxv1f64_nxv1f16(<vscale x 1 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv1f64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 1 x double> %x to <vscale x 1 x half>
@@ -1312,8 +1312,8 @@ define void @truncstore_nxv1f64_nxv1f32(<vscale x 1 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 1 x double> %x to <vscale x 1 x float>
   store <vscale x 1 x float> %y, ptr %z
@@ -1324,9 +1324,9 @@ define void @truncstore_nxv2f64_nxv2f16(<vscale x 2 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2f64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x half>
@@ -1338,8 +1338,8 @@ define void @truncstore_nxv2f64_nxv2f32(<vscale x 2 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
   store <vscale x 2 x float> %y, ptr %z
@@ -1350,9 +1350,9 @@ define void @truncstore_nxv4f64_nxv4f16(<vscale x 4 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4f64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x half>
@@ -1364,8 +1364,8 @@ define void @truncstore_nxv4f64_nxv4f32(<vscale x 4 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vs2r.v v12, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x float>
   store <vscale x 4 x float> %y, ptr %z
@@ -1376,9 +1376,9 @@ define void @truncstore_nxv8f64_nxv8f16(<vscale x 8 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8f64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x half>
@@ -1390,8 +1390,8 @@ define void @truncstore_nxv8f64_nxv8f32(<vscale x 8 x double> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vs4r.v v16, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x float>
   store <vscale x 8 x float> %y, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 796f8dde58f47..2be287f0478ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -140,16 +140,16 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    sub sp, sp, a3
 ; RV32-NEXT:    andi sp, sp, -64
 ; RV32-NEXT:    addi a3, sp, 64
-; RV32-NEXT:    vl8r.v v8, (a0)
+; RV32-NEXT:    vl8r.v v16, (a0)
 ; RV32-NEXT:    slli a2, a2, 3
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    vl8r.v v24, (a0)
+; RV32-NEXT:    vl8r.v v8, (a0)
 ; RV32-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; RV32-NEXT:    vmseq.vi v0, v8, 0
+; RV32-NEXT:    vmseq.vi v0, v16, 0
 ; RV32-NEXT:    vmv.v.i v16, 0
 ; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a2, a3, a2
-; RV32-NEXT:    vmseq.vi v8, v24, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV32-NEXT:    vs8r.v v24, (a3)
 ; RV32-NEXT:    vmv1r.v v0, v8
@@ -180,16 +180,16 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    sub sp, sp, a3
 ; RV64-NEXT:    andi sp, sp, -64
 ; RV64-NEXT:    addi a3, sp, 64
-; RV64-NEXT:    vl8r.v v8, (a0)
+; RV64-NEXT:    vl8r.v v16, (a0)
 ; RV64-NEXT:    slli a2, a2, 3
 ; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    vl8r.v v24, (a0)
+; RV64-NEXT:    vl8r.v v8, (a0)
 ; RV64-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; RV64-NEXT:    vmseq.vi v0, v8, 0
+; RV64-NEXT:    vmseq.vi v0, v16, 0
 ; RV64-NEXT:    vmv.v.i v16, 0
 ; RV64-NEXT:    add a1, a3, a1
 ; RV64-NEXT:    add a2, a3, a2
-; RV64-NEXT:    vmseq.vi v8, v24, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-NEXT:    vmerge.vim v24, v16, 1, v0
 ; RV64-NEXT:    vs8r.v v24, (a3)
 ; RV64-NEXT:    vmv1r.v v0, v8
@@ -272,8 +272,8 @@ define i1 @extractelt_nxv16i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 16 x i8>, ptr %x
@@ -287,8 +287,8 @@ define i1 @extractelt_nxv32i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 32 x i8>, ptr %x
@@ -302,8 +302,8 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl8r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <vscale x 64 x i8>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index 2356237d790b6..7851ffced708d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -189,9 +189,9 @@ define void @abs_v4i64_of_sext_v4i32(ptr %x) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-NEXT:    vmax.vv v10, v8, v9
+; CHECK-NEXT:    vmax.vv v9, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vse64.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
index 6b5ca5f391b80..3bbbc42575ad3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
@@ -128,8 +128,8 @@ define <32 x i1> @v32i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    vmsne.vi v10, v8, 0
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v11, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v10, v11
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v10, v8
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vrgather.vi v10, v8, 0
@@ -153,8 +153,8 @@ define <64 x i1> @v64i1(i1 %x, i1 %y) {
 ; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    vmsne.vi v12, v8, 0
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v13, v8, 0
-; CHECK-NEXT:    vmxor.mm v0, v12, v13
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmxor.mm v0, v12, v8
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vrgather.vi v12, v8, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index c6ff39ad10d6b..c5589a4002c22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -271,21 +271,19 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_ceil_v16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 3
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_ceil_v16f16:
@@ -444,20 +442,18 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vp_ceil_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -488,20 +484,18 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_ceil_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -573,21 +567,19 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -617,21 +609,19 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -661,21 +651,19 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -705,21 +693,19 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -749,16 +735,14 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_ceil_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -768,9 +752,8 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 61730b87c5517..495aae0af8019 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -40,11 +40,11 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVF-NEXT:    vle8.v v8, (a0)
 ; RVF-NEXT:    li a1, 134
-; RVF-NEXT:    vzext.vf2 v12, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v12, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 8
 ; RVF-NEXT:    vminu.vx v8, v8, a1
@@ -56,11 +56,11 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVD-NEXT:    vle8.v v8, (a0)
 ; RVD-NEXT:    li a1, 134
-; RVD-NEXT:    vzext.vf2 v12, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v12, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 8
 ; RVD-NEXT:    vminu.vx v8, v8, a1
@@ -121,11 +121,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 ; RVF-LABEL: ctlz_v8i16:
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RVF-NEXT:    vle16.v v10, (a0)
+; RVF-NEXT:    vle16.v v9, (a0)
 ; RVF-NEXT:    li a1, 142
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVF-NEXT:    vnsrl.wi v10, v8, 23
-; RVF-NEXT:    vrsub.vx v8, v10, a1
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
+; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 16
 ; RVF-NEXT:    vminu.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
@@ -134,11 +134,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_v8i16:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RVD-NEXT:    vle16.v v10, (a0)
+; RVD-NEXT:    vle16.v v9, (a0)
 ; RVD-NEXT:    li a1, 142
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wi v10, v8, 23
-; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 16
 ; RVD-NEXT:    vminu.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
@@ -216,12 +216,12 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_v4i32:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RVD-NEXT:    vle32.v v10, (a0)
+; RVD-NEXT:    vle32.v v9, (a0)
 ; RVD-NEXT:    li a1, 52
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wx v10, v8, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1054
-; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vminu.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
@@ -354,9 +354,9 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 190
 ; RVF-NEXT:    vmv.v.x v9, a1
 ; RVF-NEXT:    fsrmi a1, 1
-; RVF-NEXT:    vfncvt.f.xu.w v10, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v8, v10, 23
+; RVF-NEXT:    vsrl.vi v8, v8, 23
 ; RVF-NEXT:    vwsubu.vv v10, v9, v8
 ; RVF-NEXT:    li a1, 64
 ; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
@@ -429,11 +429,11 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RVF-NEXT:    vle8.v v8, (a0)
 ; RVF-NEXT:    li a1, 134
-; RVF-NEXT:    vzext.vf2 v16, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVF-NEXT:    vnsrl.wi v16, v8, 23
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v16, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 8
 ; RVF-NEXT:    vminu.vx v8, v8, a1
@@ -446,11 +446,11 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RVD-NEXT:    vle8.v v8, (a0)
 ; RVD-NEXT:    li a1, 134
-; RVD-NEXT:    vzext.vf2 v16, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVD-NEXT:    vnsrl.wi v16, v8, 23
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v16, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 8
 ; RVD-NEXT:    vminu.vx v8, v8, a1
@@ -512,11 +512,11 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
 ; RVF-LABEL: ctlz_v16i16:
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RVF-NEXT:    vle16.v v12, (a0)
+; RVF-NEXT:    vle16.v v10, (a0)
 ; RVF-NEXT:    li a1, 142
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
-; RVF-NEXT:    vrsub.vx v8, v12, a1
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
+; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 16
 ; RVF-NEXT:    vminu.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
@@ -525,11 +525,11 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_v16i16:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RVD-NEXT:    vle16.v v12, (a0)
+; RVD-NEXT:    vle16.v v10, (a0)
 ; RVD-NEXT:    li a1, 142
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
-; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 16
 ; RVD-NEXT:    vminu.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
@@ -607,12 +607,12 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_v8i32:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RVD-NEXT:    vle32.v v12, (a0)
+; RVD-NEXT:    vle32.v v10, (a0)
 ; RVD-NEXT:    li a1, 52
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wx v12, v8, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1054
-; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vminu.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
@@ -745,10 +745,10 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 190
 ; RVF-NEXT:    vmv.v.x v10, a1
 ; RVF-NEXT:    fsrmi a1, 1
-; RVF-NEXT:    vfncvt.f.xu.w v11, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v11, v11, 23
-; RVF-NEXT:    vwsubu.vv v8, v10, v11
+; RVF-NEXT:    vsrl.vi v9, v8, 23
+; RVF-NEXT:    vwsubu.vv v8, v10, v9
 ; RVF-NEXT:    li a1, 64
 ; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RVF-NEXT:    vminu.vx v8, v8, a1
@@ -817,11 +817,11 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVF-NEXT:    vle8.v v8, (a0)
-; RVF-NEXT:    vzext.vf2 v12, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v12, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    li a1, 134
 ; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -831,11 +831,11 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVD-NEXT:    vle8.v v8, (a0)
-; RVD-NEXT:    vzext.vf2 v12, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v12, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    li a1, 134
 ; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -894,22 +894,22 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; RVF-LABEL: ctlz_zero_undef_v8i16:
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RVF-NEXT:    vle16.v v10, (a0)
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVF-NEXT:    vnsrl.wi v10, v8, 23
+; RVF-NEXT:    vle16.v v9, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    li a1, 142
-; RVF-NEXT:    vrsub.vx v8, v10, a1
+; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
 ; RVD-LABEL: ctlz_zero_undef_v8i16:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RVD-NEXT:    vle16.v v10, (a0)
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wi v10, v8, 23
+; RVD-NEXT:    vle16.v v9, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    li a1, 142
-; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -982,12 +982,12 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_zero_undef_v4i32:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RVD-NEXT:    vle32.v v10, (a0)
+; RVD-NEXT:    vle32.v v9, (a0)
 ; RVD-NEXT:    li a1, 52
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wx v10, v8, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1054
-; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1117,9 +1117,9 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 190
 ; RVF-NEXT:    vmv.v.x v9, a1
 ; RVF-NEXT:    fsrmi a1, 1
-; RVF-NEXT:    vfncvt.f.xu.w v10, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v8, v10, 23
+; RVF-NEXT:    vsrl.vi v8, v8, 23
 ; RVF-NEXT:    vwsubu.vv v10, v9, v8
 ; RVF-NEXT:    vse64.v v10, (a0)
 ; RVF-NEXT:    ret
@@ -1185,11 +1185,11 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 32
 ; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RVF-NEXT:    vle8.v v8, (a0)
-; RVF-NEXT:    vzext.vf2 v16, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVF-NEXT:    vnsrl.wi v16, v8, 23
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v16, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    li a1, 134
 ; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -1200,11 +1200,11 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RVD-NEXT:    vle8.v v8, (a0)
-; RVD-NEXT:    vzext.vf2 v16, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVD-NEXT:    vnsrl.wi v16, v8, 23
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v16, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    li a1, 134
 ; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -1264,22 +1264,22 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
 ; RVF-LABEL: ctlz_zero_undef_v16i16:
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RVF-NEXT:    vle16.v v12, (a0)
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vle16.v v10, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    li a1, 142
-; RVF-NEXT:    vrsub.vx v8, v12, a1
+; RVF-NEXT:    vrsub.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
 ; RVD-LABEL: ctlz_zero_undef_v16i16:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RVD-NEXT:    vle16.v v12, (a0)
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vle16.v v10, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    li a1, 142
-; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1352,12 +1352,12 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: ctlz_zero_undef_v8i32:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RVD-NEXT:    vle32.v v12, (a0)
+; RVD-NEXT:    vle32.v v10, (a0)
 ; RVD-NEXT:    li a1, 52
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wx v12, v8, a1
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1054
-; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    vrsub.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1487,10 +1487,10 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 190
 ; RVF-NEXT:    vmv.v.x v10, a1
 ; RVF-NEXT:    fsrmi a1, 1
-; RVF-NEXT:    vfncvt.f.xu.w v11, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v11, v11, 23
-; RVF-NEXT:    vwsubu.vv v8, v10, v11
+; RVF-NEXT:    vsrl.vi v9, v8, 23
+; RVF-NEXT:    vwsubu.vv v8, v10, v9
 ; RVF-NEXT:    vse64.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 307b143f4449f..1a4714348dc5f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -34,17 +34,17 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
 ; RVF-LABEL: cttz_v16i8:
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RVF-NEXT:    vle8.v v14, (a0)
+; RVF-NEXT:    vle8.v v12, (a0)
 ; RVF-NEXT:    li a1, 127
-; RVF-NEXT:    vrsub.vi v8, v14, 0
-; RVF-NEXT:    vand.vv v8, v14, v8
+; RVF-NEXT:    vrsub.vi v8, v12, 0
+; RVF-NEXT:    vand.vv v8, v12, v8
 ; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RVF-NEXT:    vzext.vf2 v12, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v12, 0
-; RVF-NEXT:    vmseq.vi v0, v14, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
+; RVF-NEXT:    vmseq.vi v0, v12, 0
 ; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -53,17 +53,17 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
 ; RVD-LABEL: cttz_v16i8:
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RVD-NEXT:    vle8.v v14, (a0)
+; RVD-NEXT:    vle8.v v12, (a0)
 ; RVD-NEXT:    li a1, 127
-; RVD-NEXT:    vrsub.vi v8, v14, 0
-; RVD-NEXT:    vand.vv v8, v14, v8
+; RVD-NEXT:    vrsub.vi v8, v12, 0
+; RVD-NEXT:    vand.vv v8, v12, v8
 ; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RVD-NEXT:    vzext.vf2 v12, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v12, 0
-; RVD-NEXT:    vmseq.vi v0, v14, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
+; RVD-NEXT:    vmseq.vi v0, v12, 0
 ; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -121,10 +121,10 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vrsub.vi v9, v8, 0
 ; RVF-NEXT:    vmseq.vi v0, v8, 0
-; RVF-NEXT:    vand.vv v10, v8, v9
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVF-NEXT:    vnsrl.wi v10, v8, 23
-; RVF-NEXT:    vsub.vx v8, v10, a1
+; RVF-NEXT:    vand.vv v9, v8, v9
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
+; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 16
 ; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVF-NEXT:    vse16.v v8, (a0)
@@ -137,10 +137,10 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    li a1, 127
 ; RVD-NEXT:    vrsub.vi v9, v8, 0
 ; RVD-NEXT:    vmseq.vi v0, v8, 0
-; RVD-NEXT:    vand.vv v10, v8, v9
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wi v10, v8, 23
-; RVD-NEXT:    vsub.vx v8, v10, a1
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 16
 ; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVD-NEXT:    vse16.v v8, (a0)
@@ -216,12 +216,12 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vle32.v v10, (a0)
 ; RVD-NEXT:    li a1, 52
 ; RVD-NEXT:    vrsub.vi v8, v10, 0
-; RVD-NEXT:    vand.vv v11, v10, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v11
-; RVD-NEXT:    vnsrl.wx v11, v8, a1
+; RVD-NEXT:    vand.vv v9, v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1023
 ; RVD-NEXT:    vmseq.vi v0, v10, 0
-; RVD-NEXT:    vsub.vx v8, v11, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVD-NEXT:    vse32.v v8, (a0)
@@ -333,13 +333,13 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v9, v8, 0
 ; RVF-NEXT:    vand.vv v9, v8, v9
 ; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RVF-NEXT:    vfncvt.f.xu.w v10, v9
+; RVF-NEXT:    vfncvt.f.xu.w v9, v9
 ; RVF-NEXT:    fsrm a1
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; RVF-NEXT:    vmseq.vi v0, v8, 0
 ; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RVF-NEXT:    vsrl.vi v8, v10, 23
+; RVF-NEXT:    vsrl.vi v8, v9, 23
 ; RVF-NEXT:    vwsubu.vx v9, v8, a1
 ; RVF-NEXT:    li a1, 64
 ; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
@@ -409,17 +409,17 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
 ; RVF:       # %bb.0:
 ; RVF-NEXT:    li a1, 32
 ; RVF-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; RVF-NEXT:    vle8.v v20, (a0)
+; RVF-NEXT:    vle8.v v16, (a0)
 ; RVF-NEXT:    li a1, 127
-; RVF-NEXT:    vrsub.vi v8, v20, 0
-; RVF-NEXT:    vand.vv v8, v20, v8
+; RVF-NEXT:    vrsub.vi v8, v16, 0
+; RVF-NEXT:    vand.vv v8, v16, v8
 ; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RVF-NEXT:    vzext.vf2 v16, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVF-NEXT:    vnsrl.wi v16, v8, 23
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v16, 0
-; RVF-NEXT:    vmseq.vi v0, v20, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
+; RVF-NEXT:    vmseq.vi v0, v16, 0
 ; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -429,17 +429,17 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
 ; RVD:       # %bb.0:
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; RVD-NEXT:    vle8.v v20, (a0)
+; RVD-NEXT:    vle8.v v16, (a0)
 ; RVD-NEXT:    li a1, 127
-; RVD-NEXT:    vrsub.vi v8, v20, 0
-; RVD-NEXT:    vand.vv v8, v20, v8
+; RVD-NEXT:    vrsub.vi v8, v16, 0
+; RVD-NEXT:    vand.vv v8, v16, v8
 ; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RVD-NEXT:    vzext.vf2 v16, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVD-NEXT:    vnsrl.wi v16, v8, 23
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v16, 0
-; RVD-NEXT:    vmseq.vi v0, v20, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
+; RVD-NEXT:    vmseq.vi v0, v16, 0
 ; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -498,10 +498,10 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vrsub.vi v10, v8, 0
 ; RVF-NEXT:    vmseq.vi v0, v8, 0
-; RVF-NEXT:    vand.vv v12, v8, v10
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
-; RVF-NEXT:    vsub.vx v8, v12, a1
+; RVF-NEXT:    vand.vv v10, v8, v10
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
+; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    li a1, 16
 ; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVF-NEXT:    vse16.v v8, (a0)
@@ -514,10 +514,10 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    li a1, 127
 ; RVD-NEXT:    vrsub.vi v10, v8, 0
 ; RVD-NEXT:    vmseq.vi v0, v8, 0
-; RVD-NEXT:    vand.vv v12, v8, v10
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
-; RVD-NEXT:    vsub.vx v8, v12, a1
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 16
 ; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVD-NEXT:    vse16.v v8, (a0)
@@ -593,12 +593,12 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vle32.v v12, (a0)
 ; RVD-NEXT:    li a1, 52
 ; RVD-NEXT:    vrsub.vi v8, v12, 0
-; RVD-NEXT:    vand.vv v14, v12, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v14
-; RVD-NEXT:    vnsrl.wx v14, v8, a1
+; RVD-NEXT:    vand.vv v10, v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1023
 ; RVD-NEXT:    vmseq.vi v0, v12, 0
-; RVD-NEXT:    vsub.vx v8, v14, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    li a1, 32
 ; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
 ; RVD-NEXT:    vse32.v v8, (a0)
@@ -710,14 +710,14 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v10, v8, 0
 ; RVF-NEXT:    vand.vv v10, v8, v10
 ; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RVF-NEXT:    vfncvt.f.xu.w v12, v10
+; RVF-NEXT:    vfncvt.f.xu.w v10, v10
 ; RVF-NEXT:    fsrm a1
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RVF-NEXT:    vmseq.vi v0, v8, 0
 ; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RVF-NEXT:    vsrl.vi v10, v12, 23
-; RVF-NEXT:    vwsubu.vx v8, v10, a1
+; RVF-NEXT:    vsrl.vi v9, v10, 23
+; RVF-NEXT:    vwsubu.vx v8, v9, a1
 ; RVF-NEXT:    li a1, 64
 ; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
@@ -788,11 +788,11 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v9, v8, 0
 ; RVF-NEXT:    vand.vv v8, v8, v9
 ; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RVF-NEXT:    vzext.vf2 v12, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v12, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -805,11 +805,11 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vrsub.vi v9, v8, 0
 ; RVD-NEXT:    vand.vv v8, v8, v9
 ; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RVD-NEXT:    vzext.vf2 v12, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v12, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    li a1, 127
 ; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -864,11 +864,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RVF-NEXT:    vle16.v v8, (a0)
 ; RVF-NEXT:    vrsub.vi v9, v8, 0
-; RVF-NEXT:    vand.vv v10, v8, v9
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVF-NEXT:    vnsrl.wi v10, v8, 23
+; RVF-NEXT:    vand.vv v9, v8, v9
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    li a1, 127
-; RVF-NEXT:    vsub.vx v8, v10, a1
+; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
@@ -877,11 +877,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RVD-NEXT:    vle16.v v8, (a0)
 ; RVD-NEXT:    vrsub.vi v9, v8, 0
-; RVD-NEXT:    vand.vv v10, v8, v9
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wi v10, v8, 23
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    li a1, 127
-; RVD-NEXT:    vsub.vx v8, v10, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -951,11 +951,11 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vle32.v v8, (a0)
 ; RVD-NEXT:    li a1, 52
 ; RVD-NEXT:    vrsub.vi v9, v8, 0
-; RVD-NEXT:    vand.vv v10, v8, v9
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
-; RVD-NEXT:    vnsrl.wx v10, v8, a1
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v9
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1023
-; RVD-NEXT:    vsub.vx v8, v10, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1064,9 +1064,9 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v9, v8, 0
 ; RVF-NEXT:    vand.vv v8, v8, v9
 ; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RVF-NEXT:    vfncvt.f.xu.w v9, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v8, v9, 23
+; RVF-NEXT:    vsrl.vi v8, v8, 23
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vwsubu.vx v9, v8, a1
 ; RVF-NEXT:    vse64.v v9, (a0)
@@ -1134,11 +1134,11 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v10, v8, 0
 ; RVF-NEXT:    vand.vv v8, v8, v10
 ; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RVF-NEXT:    vzext.vf2 v16, v8
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVF-NEXT:    vnsrl.wi v16, v8, 23
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVF-NEXT:    vnsrl.wi v8, v16, 0
+; RVF-NEXT:    vnsrl.wi v8, v8, 0
 ; RVF-NEXT:    li a1, 127
 ; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vse8.v v8, (a0)
@@ -1152,11 +1152,11 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vrsub.vi v10, v8, 0
 ; RVD-NEXT:    vand.vv v8, v8, v10
 ; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RVD-NEXT:    vzext.vf2 v16, v8
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v16
-; RVD-NEXT:    vnsrl.wi v16, v8, 23
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; RVD-NEXT:    vnsrl.wi v8, v16, 0
+; RVD-NEXT:    vnsrl.wi v8, v8, 0
 ; RVD-NEXT:    li a1, 127
 ; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse8.v v8, (a0)
@@ -1212,11 +1212,11 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVF-NEXT:    vle16.v v8, (a0)
 ; RVF-NEXT:    vrsub.vi v10, v8, 0
-; RVF-NEXT:    vand.vv v12, v8, v10
-; RVF-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVF-NEXT:    vnsrl.wi v12, v8, 23
+; RVF-NEXT:    vand.vv v10, v8, v10
+; RVF-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVF-NEXT:    vnsrl.wi v8, v8, 23
 ; RVF-NEXT:    li a1, 127
-; RVF-NEXT:    vsub.vx v8, v12, a1
+; RVF-NEXT:    vsub.vx v8, v8, a1
 ; RVF-NEXT:    vse16.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
@@ -1225,11 +1225,11 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RVD-NEXT:    vle16.v v8, (a0)
 ; RVD-NEXT:    vrsub.vi v10, v8, 0
-; RVD-NEXT:    vand.vv v12, v8, v10
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wi v12, v8, 23
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wi v8, v8, 23
 ; RVD-NEXT:    li a1, 127
-; RVD-NEXT:    vsub.vx v8, v12, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse16.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1299,11 +1299,11 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; RVD-NEXT:    vle32.v v8, (a0)
 ; RVD-NEXT:    li a1, 52
 ; RVD-NEXT:    vrsub.vi v10, v8, 0
-; RVD-NEXT:    vand.vv v12, v8, v10
-; RVD-NEXT:    vfwcvt.f.xu.v v8, v12
-; RVD-NEXT:    vnsrl.wx v12, v8, a1
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v8, v10
+; RVD-NEXT:    vnsrl.wx v8, v8, a1
 ; RVD-NEXT:    li a1, 1023
-; RVD-NEXT:    vsub.vx v8, v12, a1
+; RVD-NEXT:    vsub.vx v8, v8, a1
 ; RVD-NEXT:    vse32.v v8, (a0)
 ; RVD-NEXT:    ret
 ;
@@ -1412,11 +1412,11 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RVF-NEXT:    vrsub.vi v10, v8, 0
 ; RVF-NEXT:    vand.vv v8, v8, v10
 ; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RVF-NEXT:    vfncvt.f.xu.w v10, v8
+; RVF-NEXT:    vfncvt.f.xu.w v8, v8
 ; RVF-NEXT:    fsrm a1
-; RVF-NEXT:    vsrl.vi v10, v10, 23
+; RVF-NEXT:    vsrl.vi v9, v8, 23
 ; RVF-NEXT:    li a1, 127
-; RVF-NEXT:    vwsubu.vx v8, v10, a1
+; RVF-NEXT:    vwsubu.vx v8, v9, a1
 ; RVF-NEXT:    vse64.v v8, (a0)
 ; RVF-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index bb2b57fbcc3b7..1ca8390a83c25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -235,8 +235,8 @@ define <16 x i16> @sextload_v16i8_v16i16(ptr %x) {
 ; CHECK-LABEL: sextload_v16i8_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i16>
@@ -247,8 +247,8 @@ define <16 x i16> @zextload_v16i8_v16i16(ptr %x) {
 ; CHECK-LABEL: zextload_v16i8_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i16>
@@ -259,8 +259,8 @@ define <16 x i32> @sextload_v16i8_v16i32(ptr %x) {
 ; CHECK-LABEL: sextload_v16i8_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vle8.v v11, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i32>
@@ -271,8 +271,8 @@ define <16 x i32> @zextload_v16i8_v16i32(ptr %x) {
 ; CHECK-LABEL: zextload_v16i8_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vle8.v v11, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i32>
@@ -283,8 +283,8 @@ define <16 x i64> @sextload_v16i8_v16i64(ptr %x) {
 ; CHECK-LABEL: sextload_v16i8_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vsext.vf8 v8, v16
+; CHECK-NEXT:    vle8.v v15, (a0)
+; CHECK-NEXT:    vsext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i64>
@@ -295,8 +295,8 @@ define <16 x i64> @zextload_v16i8_v16i64(ptr %x) {
 ; CHECK-LABEL: zextload_v16i8_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vzext.vf8 v8, v16
+; CHECK-NEXT:    vle8.v v15, (a0)
+; CHECK-NEXT:    vzext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i64>
@@ -460,8 +460,8 @@ define <8 x i32> @sextload_v8i16_v8i32(ptr %x) {
 ; CHECK-LABEL: sextload_v8i16_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = sext <8 x i16> %y to <8 x i32>
@@ -472,8 +472,8 @@ define <8 x i32> @zextload_v8i16_v8i32(ptr %x) {
 ; CHECK-LABEL: zextload_v8i16_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = zext <8 x i16> %y to <8 x i32>
@@ -484,8 +484,8 @@ define <8 x i64> @sextload_v8i16_v8i64(ptr %x) {
 ; CHECK-LABEL: sextload_v8i16_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = sext <8 x i16> %y to <8 x i64>
@@ -496,8 +496,8 @@ define <8 x i64> @zextload_v8i16_v8i64(ptr %x) {
 ; CHECK-LABEL: zextload_v8i16_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = zext <8 x i16> %y to <8 x i64>
@@ -508,8 +508,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i16_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse8.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i16> %x to <16 x i8>
   store <16 x i8> %y, ptr %z
@@ -520,8 +520,8 @@ define <16 x i32> @sextload_v16i16_v16i32(ptr %x) {
 ; CHECK-LABEL: sextload_v16i16_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = sext <16 x i16> %y to <16 x i32>
@@ -532,8 +532,8 @@ define <16 x i32> @zextload_v16i16_v16i32(ptr %x) {
 ; CHECK-LABEL: zextload_v16i16_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = zext <16 x i16> %y to <16 x i32>
@@ -544,8 +544,8 @@ define <16 x i64> @sextload_v16i16_v16i64(ptr %x) {
 ; CHECK-LABEL: sextload_v16i16_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = sext <16 x i16> %y to <16 x i64>
@@ -556,8 +556,8 @@ define <16 x i64> @zextload_v16i16_v16i64(ptr %x) {
 ; CHECK-LABEL: zextload_v16i16_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = zext <16 x i16> %y to <16 x i64>
@@ -644,8 +644,8 @@ define <4 x i64> @sextload_v4i32_v4i64(ptr %x) {
 ; CHECK-LABEL: sextload_v4i32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <4 x i32>, ptr %x
   %z = sext <4 x i32> %y to <4 x i64>
@@ -656,8 +656,8 @@ define <4 x i64> @zextload_v4i32_v4i64(ptr %x) {
 ; CHECK-LABEL: zextload_v4i32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %y = load <4 x i32>, ptr %x
   %z = zext <4 x i32> %y to <4 x i64>
@@ -668,9 +668,9 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v8i32_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i8>
@@ -682,8 +682,8 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v8i32_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i16>
   store <8 x i16> %y, ptr %z
@@ -694,8 +694,8 @@ define <8 x i64> @sextload_v8i32_v8i64(ptr %x) {
 ; CHECK-LABEL: sextload_v8i32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <8 x i32>, ptr %x
   %z = sext <8 x i32> %y to <8 x i64>
@@ -706,8 +706,8 @@ define <8 x i64> @zextload_v8i32_v8i64(ptr %x) {
 ; CHECK-LABEL: zextload_v8i32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %y = load <8 x i32>, ptr %x
   %z = zext <8 x i32> %y to <8 x i64>
@@ -718,9 +718,9 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i32_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i8>
@@ -732,8 +732,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i32_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i16>
   store <16 x i16> %y, ptr %z
@@ -744,8 +744,8 @@ define <16 x i64> @sextload_v16i32_v16i64(ptr %x) {
 ; CHECK-LABEL: sextload_v16i32_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <16 x i32>, ptr %x
   %z = sext <16 x i32> %y to <16 x i64>
@@ -756,8 +756,8 @@ define <16 x i64> @zextload_v16i32_v16i64(ptr %x) {
 ; CHECK-LABEL: zextload_v16i32_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %y = load <16 x i32>, ptr %x
   %z = zext <16 x i32> %y to <16 x i64>
@@ -810,9 +810,9 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v4i64_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
@@ -826,9 +826,9 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v4i64_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i16>
@@ -840,8 +840,8 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v4i64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i32>
   store <4 x i32> %y, ptr %z
@@ -852,9 +852,9 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v8i64_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a0)
@@ -868,9 +868,9 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v8i64_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i16>
@@ -882,8 +882,8 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v8i64_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vse32.v v12, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i32>
   store <8 x i32> %y, ptr %z
@@ -894,12 +894,12 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i64_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse8.v v10, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i8>
   store <16 x i8> %y, ptr %z
@@ -910,9 +910,9 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i64_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i16>
@@ -924,8 +924,8 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, ptr %z) {
 ; CHECK-LABEL: truncstore_v16i64_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i32>
   store <16 x i32> %y, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index 2587411566a3f..216054cb27397 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -154,9 +154,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v10, v8, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.x.s a0, v10
+; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
 ; RV32-NEXT:    ret
@@ -166,9 +166,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 32
 ; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v10, v8, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v10
+; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
 ; RV64-NEXT:    ret
@@ -178,9 +178,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 32
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v10, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV32ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV32ZBS-NEXT:    vmv.x.s a0, v10
+; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
 ;
@@ -189,9 +189,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 32
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v10, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV64ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v10
+; RV64ZBS-NEXT:    vmv.x.s a0, v8
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
   %a = load <32 x i8>, ptr %x
@@ -206,10 +206,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 64
 ; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v12, v8, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-NEXT:    srli a0, a1, 5
 ; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vx v8, v12, a0
+; RV32-NEXT:    vslidedown.vx v8, v8, a0
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -220,9 +220,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 64
 ; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v12, v8, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.x.s a0, v12
+; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
 ; RV64-NEXT:    ret
@@ -232,10 +232,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 64
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v12, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV32ZBS-NEXT:    srli a0, a1, 5
 ; RV32ZBS-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32ZBS-NEXT:    vslidedown.vx v8, v12, a0
+; RV32ZBS-NEXT:    vslidedown.vx v8, v8, a0
 ; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -245,9 +245,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 64
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v12, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vmv.x.s a0, v12
+; RV64ZBS-NEXT:    vmv.x.s a0, v8
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
   %a = load <64 x i8>, ptr %x
@@ -262,10 +262,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    li a2, 128
 ; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    vmseq.vi v16, v8, 0
+; RV32-NEXT:    vmseq.vi v8, v8, 0
 ; RV32-NEXT:    srli a0, a1, 5
 ; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vx v8, v16, a0
+; RV32-NEXT:    vslidedown.vx v8, v8, a0
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -276,10 +276,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    li a2, 128
 ; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    vmseq.vi v16, v8, 0
+; RV64-NEXT:    vmseq.vi v8, v8, 0
 ; RV64-NEXT:    srli a0, a1, 6
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vslidedown.vx v8, v16, a0
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
@@ -290,10 +290,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    li a2, 128
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
-; RV32ZBS-NEXT:    vmseq.vi v16, v8, 0
+; RV32ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV32ZBS-NEXT:    srli a0, a1, 5
 ; RV32ZBS-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZBS-NEXT:    vslidedown.vx v8, v16, a0
+; RV32ZBS-NEXT:    vslidedown.vx v8, v8, a0
 ; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -303,10 +303,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    li a2, 128
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
-; RV64ZBS-NEXT:    vmseq.vi v16, v8, 0
+; RV64ZBS-NEXT:    vmseq.vi v8, v8, 0
 ; RV64ZBS-NEXT:    srli a0, a1, 6
 ; RV64ZBS-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64ZBS-NEXT:    vslidedown.vx v8, v16, a0
+; RV64ZBS-NEXT:    vslidedown.vx v8, v8, a0
 ; RV64ZBS-NEXT:    vmv.x.s a0, v8
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
@@ -527,8 +527,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v10, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v10
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
@@ -543,8 +543,8 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 64
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v12, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v12
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
@@ -559,8 +559,8 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
@@ -575,8 +575,8 @@ define i1 @extractelt_v256i1_idx0(ptr %x) nounwind {
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmseq.vi v16, v8, 0
-; CHECK-NEXT:    vfirst.m a0, v16
+; CHECK-NEXT:    vmseq.vi v8, v8, 0
+; CHECK-NEXT:    vfirst.m a0, v8
 ; CHECK-NEXT:    seqz a0, a0
 ; CHECK-NEXT:    ret
   %a = load <256 x i8>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index dba5d26c216fa..5121e9b759089 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -543,8 +543,8 @@ define bfloat @extractelt_v8bf16_idx(<8 x bfloat> %a, i32 zeroext %idx) nounwind
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v10, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vslidedown.vx v8, v10, a0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
@@ -556,8 +556,8 @@ define bfloat @extractelt_v8bf16_idx(<8 x bfloat> %a, i32 zeroext %idx) nounwind
 ; VISNI-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; VISNI-NEXT:    vfadd.vv v8, v10, v10
 ; VISNI-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VISNI-NEXT:    vfncvtbf16.f.f.w v10, v8
-; VISNI-NEXT:    vslidedown.vx v8, v10, a0
+; VISNI-NEXT:    vfncvtbf16.f.f.w v8, v8
+; VISNI-NEXT:    vslidedown.vx v8, v8, a0
 ; VISNI-NEXT:    vmv.x.s a0, v8
 ; VISNI-NEXT:    fmv.h.x fa0, a0
 ; VISNI-NEXT:    ret
@@ -582,8 +582,8 @@ define half @extractelt_v8f16_idx(<8 x half> %a, i32 zeroext %idx) nounwind {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vslidedown.vx v8, v10, a0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vmv.x.s a0, v8
 ; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZVFHMIN-NEXT:    ret
@@ -740,8 +740,8 @@ define bfloat @extractelt_v16bf16_idx(<16 x bfloat> %a, i32 zeroext %idx) nounwi
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v12, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vslidedown.vx v8, v12, a0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
@@ -753,8 +753,8 @@ define bfloat @extractelt_v16bf16_idx(<16 x bfloat> %a, i32 zeroext %idx) nounwi
 ; VISNI-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; VISNI-NEXT:    vfadd.vv v8, v12, v12
 ; VISNI-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; VISNI-NEXT:    vfncvtbf16.f.f.w v12, v8
-; VISNI-NEXT:    vslidedown.vx v8, v12, a0
+; VISNI-NEXT:    vfncvtbf16.f.f.w v8, v8
+; VISNI-NEXT:    vslidedown.vx v8, v8, a0
 ; VISNI-NEXT:    vmv.x.s a0, v8
 ; VISNI-NEXT:    fmv.h.x fa0, a0
 ; VISNI-NEXT:    ret
@@ -779,8 +779,8 @@ define half @extractelt_v16f16_idx(<16 x half> %a, i32 zeroext %idx) nounwind {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vslidedown.vx v8, v12, a0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vmv.x.s a0, v8
 ; ZVFHMIN-NEXT:    fmv.h.x fa0, a0
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index 6fc0165d7e77f..1078acc69fbfd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -271,21 +271,19 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_v16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_v16f16:
@@ -444,20 +442,18 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev
 ; CHECK-LABEL: vp_floor_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -488,20 +484,18 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_floor_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -573,21 +567,19 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -617,21 +609,19 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -661,21 +651,19 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -705,21 +693,19 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -749,16 +735,14 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -768,9 +752,8 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
index 2bf039bd0104a..08691a2e740c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
@@ -37,9 +37,9 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -105,9 +105,9 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -136,9 +136,9 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -167,17 +167,15 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -201,14 +199,14 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -221,12 +219,10 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -239,17 +235,15 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -273,14 +267,14 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -359,12 +353,10 @@ define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -394,12 +386,10 @@ define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -462,12 +452,10 @@ define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -497,12 +485,10 @@ define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -530,32 +516,15 @@ declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16
 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -584,25 +553,25 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 5
+; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vle64.v v16, (a0)
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -613,29 +582,14 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
@@ -643,36 +597,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a1, a1, a2
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index c6cd366497218..660cb0b193926 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -33,9 +33,9 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -66,9 +66,9 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
@@ -94,14 +94,14 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
@@ -127,14 +127,14 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
@@ -282,9 +282,9 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -309,15 +309,15 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v8, v10, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <2 x half> %a, %a
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
@@ -343,15 +343,15 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v9, v8, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v10, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v9, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <2 x half> %b, %b
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
@@ -382,11 +382,11 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vslideup.vi v9, v8, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -395,9 +395,9 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %d = fadd nnan <2 x half> %a, %a
   %e = fadd nnan <2 x half> %b, %b
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
index c8cea368f905e..1736fd2b1eab9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
@@ -103,9 +103,9 @@ define <2 x half> @max_v2f16(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -125,9 +125,9 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
@@ -147,9 +147,9 @@ define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
@@ -169,9 +169,9 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
@@ -191,9 +191,9 @@ define <16 x half> @max_v16f16(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
index 73d83e86af4c6..2aa7a68c88475 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
@@ -37,9 +37,9 @@ define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -105,9 +105,9 @@ define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -136,9 +136,9 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -167,17 +167,15 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -201,14 +199,14 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -221,12 +219,10 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -239,17 +235,15 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -273,14 +267,14 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -359,12 +353,10 @@ define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -394,12 +386,10 @@ define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -462,12 +452,10 @@ define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -497,12 +485,10 @@ define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -530,32 +516,15 @@ declare <16 x double> @llvm.vp.minimum.v16f64(<16 x double>, <16 x double>, <16
 define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -584,25 +553,25 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 5
+; CHECK-NEXT:    li a3, 24
+; CHECK-NEXT:    mul a1, a1, a3
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a1, a0, 128
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vle64.v v16, (a0)
+; CHECK-NEXT:    vle64.v v24, (a0)
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -613,29 +582,14 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:  .LBB24_2:
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
@@ -643,36 +597,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
 ; CHECK-NEXT:    and a0, a1, a0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a1, a1, a2
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 5
+; CHECK-NEXT:    li a1, 24
+; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index 568923db83591..02c0cb795ecc1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -33,9 +33,9 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -66,9 +66,9 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
@@ -94,14 +94,14 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
@@ -127,14 +127,14 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.minimum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
@@ -282,9 +282,9 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -309,15 +309,15 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v8, v10, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <2 x half> %a, %a
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b)
@@ -343,15 +343,15 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v9, v8, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v10, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v9, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <2 x half> %b, %b
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
@@ -382,11 +382,11 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vslideup.vi v9, v8, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -395,9 +395,9 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %d = fadd nnan <2 x half> %a, %a
   %e = fadd nnan <2 x half> %b, %b
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
index 36114d56aa0d6..fd8b92546721d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
@@ -103,9 +103,9 @@ define <2 x half> @min_v2f16(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -125,9 +125,9 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %a, <4 x half> %b)
@@ -147,9 +147,9 @@ define <8 x half> @min_v8f16(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b)
@@ -169,9 +169,9 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <9 x half> @llvm.minimumnum.v9f16(<9 x half> %a, <9 x half> %b)
@@ -191,9 +191,9 @@ define <16 x half> @min_v16f16(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
index 78ad10033feaa..5b03db02a5a2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -36,8 +36,8 @@ define void @fpext_v8f16_v8f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: fpext_v8f16_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
@@ -51,9 +51,9 @@ define void @fpext_v8f16_v8f64(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
@@ -67,8 +67,8 @@ define void @fpround_v2f32_v2f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vse16.v v9, (a1)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x float>, ptr %x
   %d = fptrunc <2 x float> %a to <2 x half>
@@ -81,9 +81,9 @@ define void @fpround_v2f64_v2f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, ptr %x
@@ -97,8 +97,8 @@ define void @fpround_v8f32_v8f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a1)
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptrunc <8 x float> %a to <8 x half>
@@ -111,9 +111,9 @@ define void @fpround_v8f64_v8f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index abb929eaaf6e6..7565cd74471f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -17,13 +17,13 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v10, v8
-; ZVFHMIN-NEXT:    vsm.v v12, (a2)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -45,13 +45,13 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v10, v8
-; ZVFHMIN-NEXT:    vsm.v v12, (a2)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -166,20 +166,20 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vsm.v v12, (a2)
+; ZVFH-NEXT:    vmflt.vv v8, v8, v10
+; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v12, v8
-; ZVFHMIN-NEXT:    vsm.v v16, (a2)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -194,20 +194,20 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vsm.v v12, (a2)
+; ZVFH-NEXT:    vmflt.vv v8, v8, v10
+; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v12, v8
-; ZVFHMIN-NEXT:    vsm.v v16, (a2)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -222,8 +222,8 @@ define void @fcmp_oge_vv_v8f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -238,8 +238,8 @@ define void @fcmp_oge_vv_v8f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -305,8 +305,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v12, (a1)
-; ZVFH-NEXT:    vmflt.vv v16, v12, v8
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmflt.vv v8, v12, v8
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
 ;
@@ -314,13 +314,13 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a3, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vle16.v v24, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v20, (a1)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v24, v16, v8
-; ZVFHMIN-NEXT:    vmnot.m v8, v24
+; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v8
+; ZVFHMIN-NEXT:    vmnot.m v8, v8
 ; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -337,21 +337,21 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v12, (a1)
-; ZVFH-NEXT:    vmfle.vv v16, v8, v12
-; ZVFH-NEXT:    vsm.v v16, (a2)
+; ZVFH-NEXT:    vmfle.vv v8, v8, v12
+; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a3, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a1)
-; ZVFHMIN-NEXT:    vle16.v v24, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vle16.v v12, (a1)
+; ZVFHMIN-NEXT:    vle16.v v20, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v24, v16, v8
-; ZVFHMIN-NEXT:    vsm.v v24, (a2)
+; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a2)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = load <32 x half>, ptr %y
@@ -366,8 +366,8 @@ define void @fcmp_uge_vv_v16f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v12, (a1)
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v8, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -383,8 +383,8 @@ define void @fcmp_uge_vv_v16f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v12, (a1)
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = load <16 x float>, ptr %y
@@ -399,8 +399,8 @@ define void @fcmp_ult_vv_v8f64(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v12, (a1)
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v8, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -416,8 +416,8 @@ define void @fcmp_ult_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v12, (a1)
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = load <8 x double>, ptr %y
@@ -433,8 +433,8 @@ define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v16, (a1)
-; ZVFH-NEXT:    vmfle.vv v24, v8, v16
-; ZVFH-NEXT:    vmnot.m v8, v24
+; ZVFH-NEXT:    vmfle.vv v8, v8, v16
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -451,8 +451,8 @@ define void @fcmp_ugt_vv_v64f16_nonans(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
 ; ZVFH-NEXT:    vle16.v v16, (a1)
-; ZVFH-NEXT:    vmflt.vv v24, v16, v8
-; ZVFH-NEXT:    vsm.v v24, (a2)
+; ZVFH-NEXT:    vmflt.vv v8, v16, v8
+; ZVFH-NEXT:    vsm.v v8, (a2)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = load <64 x half>, ptr %y
@@ -469,8 +469,8 @@ define void @fcmp_ueq_vv_v32f32(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v16, (a1)
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v8, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v8, v8, v24
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -487,8 +487,8 @@ define void @fcmp_ueq_vv_v32f32_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vmfeq.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmfeq.vv v8, v8, v16
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = load <32 x float>, ptr %y
@@ -504,8 +504,8 @@ define void @fcmp_one_vv_v8f64(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v16, (a1)
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v8, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v8, v8, v24
 ; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -521,8 +521,8 @@ define void @fcmp_one_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vmfne.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmfne.vv v8, v8, v16
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = load <16 x double>, ptr %y
@@ -643,14 +643,14 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v8, v10
-; ZVFHMIN-NEXT:    vsm.v v12, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v8, v10
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -672,14 +672,14 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_v8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v8, v10
-; ZVFHMIN-NEXT:    vsm.v v12, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v8, v10
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -794,21 +794,21 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v10, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vmv.v.x v14, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v8, v12
-; ZVFHMIN-NEXT:    vsm.v v16, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v12
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -823,21 +823,21 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v10, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vmv.v.x v14, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v8, v12
-; ZVFHMIN-NEXT:    vsm.v v16, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v12
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -852,8 +852,8 @@ define void @fcmp_oge_vf_v8f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -868,8 +868,8 @@ define void @fcmp_oge_vf_v8f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -935,8 +935,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v12, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v12
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
@@ -944,14 +944,14 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a2, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv.v.x v20, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v24, v16, v8
-; ZVFHMIN-NEXT:    vmnot.m v8, v24
+; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v8
+; ZVFHMIN-NEXT:    vmnot.m v8, v8
 ; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -968,22 +968,22 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfle.vf v12, v8, fa0
-; ZVFH-NEXT:    vsm.v v12, (a1)
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_v32f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a2, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv.v.x v20, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v24, v8, v16
-; ZVFHMIN-NEXT:    vsm.v v24, (a1)
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v16
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -998,8 +998,8 @@ define void @fcmp_uge_vf_v16f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -1015,8 +1015,8 @@ define void @fcmp_uge_vf_v16f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
@@ -1031,8 +1031,8 @@ define void @fcmp_ult_vf_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -1048,8 +1048,8 @@ define void @fcmp_ult_vf_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = insertelement <8 x double> poison, double %y, i32 0
@@ -1065,8 +1065,8 @@ define void @fcmp_ugt_vf_v64f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfle.vf v16, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -1083,8 +1083,8 @@ define void @fcmp_ugt_vf_v64f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v16, v8, fa0
-; ZVFH-NEXT:    vsm.v v16, (a1)
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = insertelement <64 x half> poison, half %y, i32 0
@@ -1101,8 +1101,8 @@ define void @fcmp_ueq_vf_v32f32(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v8, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v8, v8, v16
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -1119,8 +1119,8 @@ define void @fcmp_ueq_vf_v32f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = insertelement <32 x float> poison, float %y, i32 0
@@ -1136,8 +1136,8 @@ define void @fcmp_one_vf_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v8, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v8, v8, v16
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -1153,8 +1153,8 @@ define void @fcmp_one_vf_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfne.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = insertelement <16 x double> poison, double %y, i32 0
@@ -1284,14 +1284,14 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v10, v8
-; ZVFHMIN-NEXT:    vsm.v v12, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -1313,14 +1313,14 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN-LABEL: fcmp_oeq_fv_v8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v10, v8
-; ZVFHMIN-NEXT:    vsm.v v12, (a1)
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -1435,21 +1435,21 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v10, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vmv.v.x v14, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v12, v8
-; ZVFHMIN-NEXT:    vsm.v v16, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -1464,21 +1464,21 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vsm.v v10, (a1)
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v10, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vmv.v.x v14, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v16, v12, v8
-; ZVFHMIN-NEXT:    vsm.v v16, (a1)
+; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -1493,8 +1493,8 @@ define void @fcmp_oge_fv_v8f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -1509,8 +1509,8 @@ define void @fcmp_oge_fv_v8f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -1576,8 +1576,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v12, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v12
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
@@ -1585,14 +1585,14 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a2, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv.v.x v20, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v24, v8, v16
-; ZVFHMIN-NEXT:    vmnot.m v8, v24
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v16
+; ZVFHMIN-NEXT:    vmnot.m v8, v8
 ; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
@@ -1609,22 +1609,22 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 32
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfge.vf v12, v8, fa0
-; ZVFH-NEXT:    vsm.v v12, (a1)
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_fv_v32f16_nonans:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    li a2, 32
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv.v.x v20, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v24, v16, v8
-; ZVFHMIN-NEXT:    vsm.v v24, (a1)
+; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v8
+; ZVFHMIN-NEXT:    vsm.v v8, (a1)
 ; ZVFHMIN-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -1639,8 +1639,8 @@ define void @fcmp_uge_fv_v16f32(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
@@ -1656,8 +1656,8 @@ define void @fcmp_uge_fv_v16f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
@@ -1672,8 +1672,8 @@ define void @fcmp_ult_fv_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v8, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
@@ -1689,8 +1689,8 @@ define void @fcmp_ult_fv_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %b = insertelement <8 x double> poison, double %y, i32 0
@@ -1706,8 +1706,8 @@ define void @fcmp_ugt_fv_v64f16(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmfge.vf v16, v8, fa0
-; ZVFH-NEXT:    vmnot.m v8, v16
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v8, v8
 ; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
@@ -1724,8 +1724,8 @@ define void @fcmp_ugt_fv_v64f16_nonans(ptr %x, half %y, ptr %z) {
 ; ZVFH-NEXT:    li a2, 64
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
 ; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vmflt.vf v16, v8, fa0
-; ZVFH-NEXT:    vsm.v v16, (a1)
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vsm.v v8, (a1)
 ; ZVFH-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = insertelement <64 x half> poison, half %y, i32 0
@@ -1742,8 +1742,8 @@ define void @fcmp_ueq_fv_v32f32(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v8, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v8, v8, v16
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
@@ -1760,8 +1760,8 @@ define void @fcmp_ueq_fv_v32f32_nonans(ptr %x, float %y, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = insertelement <32 x float> poison, float %y, i32 0
@@ -1777,8 +1777,8 @@ define void @fcmp_one_fv_v8f64(ptr %x, double %y, ptr %z) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v8, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v8, v8, v16
 ; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
@@ -1794,8 +1794,8 @@ define void @fcmp_one_fv_v8f64_nonans(ptr %x, double %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmfne.vf v8, v8, fa0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = insertelement <16 x double> poison, double %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 38df622998bf9..023d9e1d45936 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -9,15 +9,15 @@ define void @fadd_v8bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fadd_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -30,15 +30,15 @@ define void @fadd_v6bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fadd_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -60,15 +60,15 @@ define void @fadd_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fadd_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -90,15 +90,15 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fadd_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -143,15 +143,15 @@ define void @fsub_v8bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fsub_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -164,15 +164,15 @@ define void @fsub_v6bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fsub_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -194,15 +194,15 @@ define void @fsub_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fsub_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -224,15 +224,15 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fsub_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -277,15 +277,15 @@ define void @fmul_v8bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fmul_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -298,15 +298,15 @@ define void @fmul_v6bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fmul_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -328,15 +328,15 @@ define void @fmul_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fmul_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -358,15 +358,15 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fmul_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -411,15 +411,15 @@ define void @fdiv_v8bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fdiv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -432,15 +432,15 @@ define void @fdiv_v6bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fdiv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -462,15 +462,15 @@ define void @fdiv_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fdiv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -492,15 +492,15 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fdiv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -1168,8 +1168,8 @@ define void @copysign_neg_trunc_v4bf16_v4f32(ptr %x, ptr %y) {
 ; CHECK-NEXT:    lui a1, 8
 ; CHECK-NEXT:    addi a2, a1, -1
 ; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v9
-; CHECK-NEXT:    vxor.vx v9, v10, a1
+; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v9
+; CHECK-NEXT:    vxor.vx v9, v9, a1
 ; CHECK-NEXT:    vand.vx v9, v9, a1
 ; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    vse16.v v8, (a0)
@@ -1192,8 +1192,8 @@ define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) {
 ; CHECK-NEXT:    lui a1, 8
 ; CHECK-NEXT:    addi a2, a1, -1
 ; CHECK-NEXT:    vand.vx v8, v8, a2
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v9
-; CHECK-NEXT:    vxor.vx v9, v10, a1
+; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v9
+; CHECK-NEXT:    vxor.vx v9, v9, a1
 ; CHECK-NEXT:    vand.vx v9, v9, a1
 ; CHECK-NEXT:    vor.vv v8, v8, v9
 ; CHECK-NEXT:    vse16.v v8, (a0)
@@ -1213,8 +1213,8 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vle32.v v8, (a1)
 ; ZVFH-NEXT:    vle16.v v9, (a0)
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v8
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
@@ -1226,8 +1226,8 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
@@ -1247,8 +1247,8 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
 ; ZVFH-NEXT:    vle32.v v8, (a1)
 ; ZVFH-NEXT:    vle16.v v9, (a0)
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v8
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
@@ -1260,8 +1260,8 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
@@ -1299,13 +1299,13 @@ define void @sqrt_v8bf16(ptr %x) {
 ; CHECK-LABEL: sqrt_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a)
@@ -1317,13 +1317,13 @@ define void @sqrt_v6bf16(ptr %x) {
 ; CHECK-LABEL: sqrt_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = call <6 x bfloat> @llvm.sqrt.v6bf16(<6 x bfloat> %a)
@@ -1343,13 +1343,13 @@ define void @sqrt_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: sqrt_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsqrt.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
@@ -1369,13 +1369,13 @@ define void @sqrt_v6f16(ptr %x) {
 ; ZVFHMIN-LABEL: sqrt_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsqrt.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
@@ -1415,12 +1415,12 @@ define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fma_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a2)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vle16.v v9, (a2)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v13, (a1)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1439,12 +1439,12 @@ define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fma_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a2)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vle16.v v9, (a2)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v13, (a1)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1473,12 +1473,12 @@ define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fma_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v14, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vle16.v v9, (a2)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v13, (a1)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1507,12 +1507,12 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fma_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v14, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vle16.v v9, (a2)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v13, (a1)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1568,13 +1568,13 @@ define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v13, (a1)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vxor.vx v12, v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vxor.vx v11, v8, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1595,13 +1595,13 @@ define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v13, (a1)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vxor.vx v12, v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vxor.vx v11, v8, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1632,13 +1632,13 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vle16.v v14, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vle16.v v13, (a1)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1669,13 +1669,13 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vle16.v v14, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vle16.v v13, (a1)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1734,15 +1734,15 @@ define void @fadd_v16bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fadd_v16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a1)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v16
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <16 x bfloat>, ptr %x
   %b = load <16 x bfloat>, ptr %y
@@ -1764,15 +1764,15 @@ define void @fadd_v16f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fadd_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -1817,15 +1817,15 @@ define void @fsub_v16bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fsub_v16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a1)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v16
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <16 x bfloat>, ptr %x
   %b = load <16 x bfloat>, ptr %y
@@ -1847,15 +1847,15 @@ define void @fsub_v16f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fsub_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -1900,15 +1900,15 @@ define void @fmul_v16bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fmul_v16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a1)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v16
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <16 x bfloat>, ptr %x
   %b = load <16 x bfloat>, ptr %y
@@ -1930,15 +1930,15 @@ define void @fmul_v16f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fmul_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -1983,15 +1983,15 @@ define void @fdiv_v16bf16(ptr %x, ptr %y) {
 ; CHECK-LABEL: fdiv_v16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a1)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v16
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <16 x bfloat>, ptr %x
   %b = load <16 x bfloat>, ptr %y
@@ -2013,15 +2013,15 @@ define void @fdiv_v16f16(ptr %x, ptr %y) {
 ; ZVFHMIN-LABEL: fdiv_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -2132,12 +2132,12 @@ define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fma_v16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a2)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v16
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vle16.v v18, (a1)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v18
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v16, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -2166,12 +2166,12 @@ define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fma_v16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v12, (a2)
-; ZVFHMIN-NEXT:    vle16.v v16, (a0)
-; ZVFHMIN-NEXT:    vle16.v v20, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vle16.v v10, (a2)
+; ZVFHMIN-NEXT:    vle16.v v14, (a0)
+; ZVFHMIN-NEXT:    vle16.v v18, (a1)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -2226,16 +2226,16 @@ define void @fadd_vf_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fadd_vf_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2249,16 +2249,16 @@ define void @fadd_vf_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fadd_vf_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2280,16 +2280,16 @@ define void @fadd_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fadd_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -2311,16 +2311,16 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fadd_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -2366,16 +2366,16 @@ define void @fadd_fv_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fadd_fv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2389,16 +2389,16 @@ define void @fadd_fv_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fadd_fv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2420,16 +2420,16 @@ define void @fadd_fv_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fadd_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -2451,16 +2451,16 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fadd_fv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -2506,16 +2506,16 @@ define void @fsub_vf_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fsub_vf_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2529,16 +2529,16 @@ define void @fsub_vf_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fsub_vf_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2560,16 +2560,16 @@ define void @fsub_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fsub_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -2591,16 +2591,16 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fsub_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -2646,16 +2646,16 @@ define void @fsub_fv_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fsub_fv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2669,16 +2669,16 @@ define void @fsub_fv_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fsub_fv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2700,16 +2700,16 @@ define void @fsub_fv_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fsub_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -2731,16 +2731,16 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fsub_fv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -2786,16 +2786,16 @@ define void @fmul_vf_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fmul_vf_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2809,16 +2809,16 @@ define void @fmul_vf_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fmul_vf_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2840,16 +2840,16 @@ define void @fmul_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fmul_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -2871,16 +2871,16 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fmul_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -2926,16 +2926,16 @@ define void @fmul_fv_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fmul_fv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -2949,16 +2949,16 @@ define void @fmul_fv_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fmul_fv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -2980,16 +2980,16 @@ define void @fmul_fv_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fmul_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -3011,16 +3011,16 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fmul_fv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -3066,16 +3066,16 @@ define void @fdiv_vf_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fdiv_vf_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -3089,16 +3089,16 @@ define void @fdiv_vf_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fdiv_vf_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -3120,16 +3120,16 @@ define void @fdiv_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fdiv_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -3151,16 +3151,16 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fdiv_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -3206,16 +3206,16 @@ define void @fdiv_fv_v8bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fdiv_fv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
@@ -3229,16 +3229,16 @@ define void @fdiv_fv_v6bf16(ptr %x, bfloat %y) {
 ; CHECK-LABEL: fdiv_fv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v12, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vmv.v.x v11, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
@@ -3260,16 +3260,16 @@ define void @fdiv_fv_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fdiv_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -3291,16 +3291,16 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-LABEL: fdiv_fv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vmv.v.x v11, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
@@ -3346,13 +3346,13 @@ define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-LABEL: fma_vf_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v14, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3372,13 +3372,13 @@ define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-LABEL: fma_vf_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v14, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3407,13 +3407,13 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-LABEL: fma_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3442,13 +3442,13 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-LABEL: fma_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3504,13 +3504,13 @@ define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-LABEL: fma_fv_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v14, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3530,13 +3530,13 @@ define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-LABEL: fma_fv_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vmv.v.x v14, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3565,13 +3565,13 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-LABEL: fma_fv_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3600,13 +3600,13 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-LABEL: fma_fv_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3664,13 +3664,13 @@ define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    fmv.x.w a2, fa0
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vmv.v.x v14, a2
-; CHECK-NEXT:    vxor.vx v12, v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a2
+; CHECK-NEXT:    vxor.vx v11, v8, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3693,13 +3693,13 @@ define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    fmv.x.w a2, fa0
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vmv.v.x v14, a2
-; CHECK-NEXT:    vxor.vx v12, v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v14
+; CHECK-NEXT:    vmv.v.x v13, a2
+; CHECK-NEXT:    vxor.vx v11, v8, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3731,13 +3731,13 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vmv.v.x v14, a2
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a2
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3769,13 +3769,13 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vmv.v.x v14, a2
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vmv.v.x v13, a2
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -3874,10 +3874,10 @@ define void @trunc_v8bf16(ptr %x) {
 ; CHECK-LABEL: trunc_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -3886,8 +3886,8 @@ define void @trunc_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> %a)
@@ -3899,10 +3899,10 @@ define void @trunc_v6bf16(ptr %x) {
 ; CHECK-LABEL: trunc_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -3911,8 +3911,8 @@ define void @trunc_v6bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = call <6 x bfloat> @llvm.trunc.v6bf16(<6 x bfloat> %a)
@@ -3939,10 +3939,10 @@ define void @trunc_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: trunc_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -3951,8 +3951,8 @@ define void @trunc_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
@@ -3979,10 +3979,10 @@ define void @trunc_v6f16(ptr %x) {
 ; ZVFHMIN-LABEL: trunc_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -3991,8 +3991,8 @@ define void @trunc_v6f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
@@ -4046,10 +4046,10 @@ define void @ceil_v8bf16(ptr %x) {
 ; CHECK-LABEL: ceil_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4060,8 +4060,8 @@ define void @ceil_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> %a)
@@ -4073,10 +4073,10 @@ define void @ceil_v6bf16(ptr %x) {
 ; CHECK-LABEL: ceil_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4087,8 +4087,8 @@ define void @ceil_v6bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = call <6 x bfloat> @llvm.ceil.v6bf16(<6 x bfloat> %a)
@@ -4117,10 +4117,10 @@ define void @ceil_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: ceil_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4131,8 +4131,8 @@ define void @ceil_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
@@ -4161,10 +4161,10 @@ define void @ceil_v6f16(ptr %x) {
 ; ZVFHMIN-LABEL: ceil_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4175,8 +4175,8 @@ define void @ceil_v6f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
@@ -4234,10 +4234,10 @@ define void @floor_v8bf16(ptr %x) {
 ; CHECK-LABEL: floor_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4248,8 +4248,8 @@ define void @floor_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> %a)
@@ -4261,10 +4261,10 @@ define void @floor_v6bf16(ptr %x) {
 ; CHECK-LABEL: floor_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4275,8 +4275,8 @@ define void @floor_v6bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = call <6 x bfloat> @llvm.floor.v6bf16(<6 x bfloat> %a)
@@ -4305,10 +4305,10 @@ define void @floor_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: floor_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4319,8 +4319,8 @@ define void @floor_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
@@ -4349,10 +4349,10 @@ define void @floor_v6f16(ptr %x) {
 ; ZVFHMIN-LABEL: floor_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4363,8 +4363,8 @@ define void @floor_v6f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
@@ -4422,10 +4422,10 @@ define void @round_v8bf16(ptr %x) {
 ; CHECK-LABEL: round_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4436,8 +4436,8 @@ define void @round_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> %a)
@@ -4449,10 +4449,10 @@ define void @round_v6bf16(ptr %x) {
 ; CHECK-LABEL: round_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4463,8 +4463,8 @@ define void @round_v6bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = call <6 x bfloat> @llvm.round.v6bf16(<6 x bfloat> %a)
@@ -4493,10 +4493,10 @@ define void @round_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: round_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4507,8 +4507,8 @@ define void @round_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
@@ -4537,10 +4537,10 @@ define void @round_v6f16(ptr %x) {
 ; ZVFHMIN-LABEL: round_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4551,8 +4551,8 @@ define void @round_v6f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
@@ -4610,10 +4610,10 @@ define void @rint_v8bf16(ptr %x) {
 ; CHECK-LABEL: rint_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4622,8 +4622,8 @@ define void @rint_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> %a)
@@ -4650,10 +4650,10 @@ define void @rint_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: rint_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4662,8 +4662,8 @@ define void @rint_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
@@ -4717,10 +4717,10 @@ define void @nearbyint_v8bf16(ptr %x) {
 ; CHECK-LABEL: nearbyint_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfabs.v v10, v8
 ; CHECK-NEXT:    vmflt.vf v0, v10, fa5
@@ -4731,8 +4731,8 @@ define void @nearbyint_v8bf16(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> %a)
@@ -4761,10 +4761,10 @@ define void @nearbyint_v8f16(ptr %x) {
 ; ZVFHMIN-LABEL: nearbyint_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 307200
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v10, v8
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v10, fa5
@@ -4775,8 +4775,8 @@ define void @nearbyint_v8f16(ptr %x) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; ZVFHMIN-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
@@ -4834,22 +4834,22 @@ define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fmuladd_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v13, (a2)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v12, (a2)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v13
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -4863,22 +4863,22 @@ define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fmuladd_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v13, (a2)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v12, (a2)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v13
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -4902,22 +4902,22 @@ define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fmuladd_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v13, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a2)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v13
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -4941,22 +4941,22 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fmuladd_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v13, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a2)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v13
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -5006,22 +5006,22 @@ define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fmsub_fmuladd_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v13, (a2)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v12, (a2)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v13
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <8 x bfloat>, ptr %x
   %b = load <8 x bfloat>, ptr %y
@@ -5036,22 +5036,22 @@ define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-LABEL: fmsub_fmuladd_v6bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v13, (a2)
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v11, (a0)
+; CHECK-NEXT:    vle16.v v12, (a2)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v13
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfsub.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x bfloat>, ptr %x
   %b = load <6 x bfloat>, ptr %y
@@ -5076,22 +5076,22 @@ define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v13, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a2)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v13
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -5116,22 +5116,22 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vle16.v v12, (a0)
-; ZVFHMIN-NEXT:    vle16.v v13, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v11, (a0)
+; ZVFHMIN-NEXT:    vle16.v v12, (a2)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v13
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 1f9a8bf8133c8..62e4c2bda64db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -113,9 +113,9 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2si_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vmfne.vv v0, v12, v12
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    vse64.v v8, (a1)
@@ -132,9 +132,9 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2ui_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vmfne.vv v0, v12, v12
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vmfne.vv v0, v10, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    vse64.v v8, (a1)
@@ -193,9 +193,9 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v9, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
@@ -215,9 +215,9 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v9, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
@@ -238,9 +238,9 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
@@ -261,9 +261,9 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
@@ -283,8 +283,8 @@ define void @fp2si_v2f64_v2i32(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, ptr %x
@@ -301,8 +301,8 @@ define void @fp2ui_v2f64_v2i32(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmerge.vim v8, v9, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 9be93d5209121..2dc8365ce152e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -36,8 +36,8 @@ define <2 x i1> @fp2si_v2f32_v2i1(<2 x float> %x) {
 ; CHECK-LABEL: fp2si_v2f32_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptosi <2 x float> %x to <2 x i1>
@@ -48,8 +48,7 @@ define <2 x i15> @fp2si_v2f32_v2i15(<2 x float> %x) {
 ; CHECK-LABEL: fp2si_v2f32_v2i15:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %z = fptosi <2 x float> %x to <2 x i15>
   ret <2 x i15> %z
@@ -59,8 +58,7 @@ define <2 x i15> @fp2ui_v2f32_v2i15(<2 x float> %x) {
 ; CHECK-LABEL: fp2ui_v2f32_v2i15:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %z = fptoui <2 x float> %x to <2 x i15>
   ret <2 x i15> %z
@@ -70,8 +68,8 @@ define <2 x i1> @fp2ui_v2f32_v2i1(<2 x float> %x) {
 ; CHECK-LABEL: fp2ui_v2f32_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptoui <2 x float> %x to <2 x i1>
@@ -110,8 +108,8 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) {
 ; CHECK-LABEL: fp2si_v3f32_v3i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptosi <3 x float> %x to <3 x i1>
@@ -123,16 +121,16 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH32-LABEL: fp2si_v3f32_v3i15:
 ; ZVFH32:       # %bb.0:
 ; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT:    vmv.x.s a1, v9
-; ZVFH32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT:    vmv.x.s a2, v8
+; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFH32-NEXT:    vmv.x.s a1, v8
+; ZVFH32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH32-NEXT:    vmv.x.s a2, v9
 ; ZVFH32-NEXT:    slli a1, a1, 17
 ; ZVFH32-NEXT:    srli a1, a1, 17
 ; ZVFH32-NEXT:    slli a3, a2, 30
 ; ZVFH32-NEXT:    or a1, a1, a3
-; ZVFH32-NEXT:    vmv.x.s a3, v9
+; ZVFH32-NEXT:    vmv.x.s a3, v8
 ; ZVFH32-NEXT:    slli a2, a2, 17
 ; ZVFH32-NEXT:    slli a3, a3, 17
 ; ZVFH32-NEXT:    srli a2, a2, 19
@@ -145,13 +143,13 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH64-LABEL: fp2si_v3f32_v3i15:
 ; ZVFH64:       # %bb.0:
 ; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT:    vmv.x.s a1, v9
-; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
-; ZVFH64-NEXT:    vslidedown.vi v9, v9, 2
+; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH64-NEXT:    vmv.x.s a1, v8
+; ZVFH64-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFH64-NEXT:    vslidedown.vi v8, v8, 2
 ; ZVFH64-NEXT:    slli a1, a1, 49
-; ZVFH64-NEXT:    vmv.x.s a2, v8
-; ZVFH64-NEXT:    vmv.x.s a3, v9
+; ZVFH64-NEXT:    vmv.x.s a2, v9
+; ZVFH64-NEXT:    vmv.x.s a3, v8
 ; ZVFH64-NEXT:    srli a1, a1, 49
 ; ZVFH64-NEXT:    slli a2, a2, 49
 ; ZVFH64-NEXT:    slli a3, a3, 30
@@ -167,16 +165,16 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15:
 ; ZVFHMIN32:       # %bb.0:
 ; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT:    vmv.x.s a1, v9
-; ZVFHMIN32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
 ; ZVFHMIN32-NEXT:    slli a1, a1, 17
 ; ZVFHMIN32-NEXT:    srli a1, a1, 17
 ; ZVFHMIN32-NEXT:    slli a3, a2, 30
 ; ZVFHMIN32-NEXT:    or a1, a1, a3
-; ZVFHMIN32-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN32-NEXT:    vmv.x.s a3, v8
 ; ZVFHMIN32-NEXT:    slli a2, a2, 17
 ; ZVFHMIN32-NEXT:    slli a3, a3, 17
 ; ZVFHMIN32-NEXT:    srli a2, a2, 19
@@ -189,13 +187,13 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
 ; ZVFHMIN64:       # %bb.0:
 ; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
-; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
-; ZVFHMIN64-NEXT:    vslidedown.vi v9, v9, 2
+; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN64-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v8, 2
 ; ZVFHMIN64-NEXT:    slli a1, a1, 49
-; ZVFHMIN64-NEXT:    vmv.x.s a2, v8
-; ZVFHMIN64-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN64-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
 ; ZVFHMIN64-NEXT:    srli a1, a1, 49
 ; ZVFHMIN64-NEXT:    slli a2, a2, 49
 ; ZVFHMIN64-NEXT:    slli a3, a3, 30
@@ -216,16 +214,16 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH32-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFH32:       # %bb.0:
 ; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT:    vmv.x.s a1, v9
-; ZVFH32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT:    vmv.x.s a2, v8
+; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFH32-NEXT:    vmv.x.s a1, v8
+; ZVFH32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH32-NEXT:    vmv.x.s a2, v9
 ; ZVFH32-NEXT:    slli a1, a1, 16
 ; ZVFH32-NEXT:    srli a1, a1, 16
 ; ZVFH32-NEXT:    slli a3, a2, 30
 ; ZVFH32-NEXT:    or a1, a1, a3
-; ZVFH32-NEXT:    vmv.x.s a3, v9
+; ZVFH32-NEXT:    vmv.x.s a3, v8
 ; ZVFH32-NEXT:    slli a2, a2, 17
 ; ZVFH32-NEXT:    slli a3, a3, 16
 ; ZVFH32-NEXT:    srli a2, a2, 19
@@ -238,13 +236,13 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFH64:       # %bb.0:
 ; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT:    vmv.x.s a1, v9
-; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
-; ZVFH64-NEXT:    vslidedown.vi v9, v9, 2
+; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH64-NEXT:    vmv.x.s a1, v8
+; ZVFH64-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFH64-NEXT:    vslidedown.vi v8, v8, 2
 ; ZVFH64-NEXT:    slli a1, a1, 48
-; ZVFH64-NEXT:    vmv.x.s a2, v8
-; ZVFH64-NEXT:    vmv.x.s a3, v9
+; ZVFH64-NEXT:    vmv.x.s a2, v9
+; ZVFH64-NEXT:    vmv.x.s a3, v8
 ; ZVFH64-NEXT:    srli a1, a1, 48
 ; ZVFH64-NEXT:    slli a2, a2, 48
 ; ZVFH64-NEXT:    slli a3, a3, 30
@@ -260,16 +258,16 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFHMIN32:       # %bb.0:
 ; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT:    vmv.x.s a1, v9
-; ZVFHMIN32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
 ; ZVFHMIN32-NEXT:    slli a1, a1, 16
 ; ZVFHMIN32-NEXT:    srli a1, a1, 16
 ; ZVFHMIN32-NEXT:    slli a3, a2, 30
 ; ZVFHMIN32-NEXT:    or a1, a1, a3
-; ZVFHMIN32-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN32-NEXT:    vmv.x.s a3, v8
 ; ZVFHMIN32-NEXT:    slli a2, a2, 17
 ; ZVFHMIN32-NEXT:    slli a3, a3, 16
 ; ZVFHMIN32-NEXT:    srli a2, a2, 19
@@ -282,13 +280,13 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFHMIN64:       # %bb.0:
 ; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
-; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
-; ZVFHMIN64-NEXT:    vslidedown.vi v9, v9, 2
+; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN64-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v8, 2
 ; ZVFHMIN64-NEXT:    slli a1, a1, 48
-; ZVFHMIN64-NEXT:    vmv.x.s a2, v8
-; ZVFHMIN64-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN64-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
 ; ZVFHMIN64-NEXT:    srli a1, a1, 48
 ; ZVFHMIN64-NEXT:    slli a2, a2, 48
 ; ZVFHMIN64-NEXT:    slli a3, a3, 30
@@ -308,8 +306,8 @@ define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) {
 ; CHECK-LABEL: fp2ui_v3f32_v3i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptoui <3 x float> %x to <3 x i1>
@@ -348,8 +346,8 @@ define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) {
 ; CHECK-LABEL: fp2si_v8f32_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptosi <8 x float> %x to <8 x i1>
@@ -360,8 +358,8 @@ define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) {
 ; CHECK-LABEL: fp2ui_v8f32_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptoui <8 x float> %x to <8 x i1>
@@ -400,8 +398,8 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2si_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
@@ -414,8 +412,8 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2ui_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
@@ -518,8 +516,8 @@ define <2 x i1> @fp2si_v2f16_v2i1(<2 x half> %x) {
 ; ZVFH-LABEL: fp2si_v2f16_v2i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -539,8 +537,8 @@ define <2 x i1> @fp2ui_v2f16_v2i1(<2 x half> %x) {
 ; ZVFH-LABEL: fp2ui_v2f16_v2i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -561,9 +559,9 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -579,9 +577,9 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -596,8 +594,8 @@ define <2 x i1> @fp2si_v2f64_v2i1(<2 x double> %x) {
 ; CHECK-LABEL: fp2si_v2f64_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptosi <2 x double> %x to <2 x i1>
@@ -608,8 +606,8 @@ define <2 x i1> @fp2ui_v2f64_v2i1(<2 x double> %x) {
 ; CHECK-LABEL: fp2ui_v2f64_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptoui <2 x double> %x to <2 x i1>
@@ -621,9 +619,9 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -639,9 +637,9 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -656,8 +654,8 @@ define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) {
 ; CHECK-LABEL: fp2si_v8f64_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptosi <8 x double> %x to <8 x i1>
@@ -668,8 +666,8 @@ define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) {
 ; CHECK-LABEL: fp2ui_v8f64_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %z = fptoui <8 x double> %x to <8 x i1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
index c29f4edb8c6a3..a4c19d469c685 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
@@ -84,8 +84,8 @@ define <15 x double> @vfpext_v15f32_v15f64(<15 x float> %a, <15 x i1> %m, i32 ze
 ; CHECK-LABEL: vfpext_v15f32_v15f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.fpext.v15f64.v15f32(<15 x float> %a, <15 x i1> %m, i32 %vl)
   ret <15 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
index bc86be6f62fd1..d056e638431c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
@@ -76,9 +76,8 @@ define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptosi_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
index cbc4c69669b51..2fb379243ba35 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
@@ -10,8 +10,7 @@ define <4 x i7> @vfptosi_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFH-LABEL: vfptosi_v4i7_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i7_v4f16:
@@ -33,8 +32,7 @@ define <4 x i8> @vfptosi_v4i8_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFH-LABEL: vfptosi_v4i8_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i8_v4f16:
@@ -54,8 +52,7 @@ define <4 x i8> @vfptosi_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfptosi_v4i8_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i8_v4f16_unmasked:
@@ -155,17 +152,17 @@ define <4 x i64> @vfptosi_v4i64_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vfptosi_v4i64_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i64_v4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -175,17 +172,17 @@ define <4 x i64> @vfptosi_v4i64_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vfptosi_v4i64_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i64_v4f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
@@ -197,9 +194,9 @@ define <4 x i8> @vfptosi_v4i8_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: vfptosi_v4i8_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i8> %v
@@ -209,9 +206,9 @@ define <4 x i8> @vfptosi_v4i8_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vfptosi_v4i8_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i8> %v
@@ -223,8 +220,7 @@ define <4 x i16> @vfptosi_v4i16_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptosi_v4i16_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
@@ -234,8 +230,7 @@ define <4 x i16> @vfptosi_v4i16_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
 ; CHECK-LABEL: vfptosi_v4i16_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i16> %v
@@ -269,8 +264,8 @@ define <4 x i64> @vfptosi_v4i64_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptosi_v4i64_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -280,8 +275,8 @@ define <4 x i64> @vfptosi_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
 ; CHECK-LABEL: vfptosi_v4i64_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
@@ -293,9 +288,9 @@ define <4 x i8> @vfptosi_v4i8_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptosi_v4i8_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -307,9 +302,9 @@ define <4 x i8> @vfptosi_v4i8_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vfptosi_v4i8_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -323,9 +318,9 @@ define <4 x i16> @vfptosi_v4i16_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptosi_v4i16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
@@ -335,9 +330,9 @@ define <4 x i16> @vfptosi_v4i16_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
 ; CHECK-LABEL: vfptosi_v4i16_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i16> %v
@@ -349,8 +344,7 @@ define <4 x i32> @vfptosi_v4i32_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptosi_v4i32_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
@@ -360,8 +354,7 @@ define <4 x i32> @vfptosi_v4i32_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
 ; CHECK-LABEL: vfptosi_v4i32_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
index c41f14076db31..b6a136faf0e1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll
@@ -76,9 +76,8 @@ define <4 x i1> @vfptoui_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptoui_v4i1_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
index 5dd3e0372f401..d1aa97f068f39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
@@ -10,8 +10,7 @@ define <4 x i7> @vfptoui_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFH-LABEL: vfptoui_v4i7_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i7_v4f16:
@@ -33,8 +32,7 @@ define <4 x i8> @vfptoui_v4i8_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFH-LABEL: vfptoui_v4i8_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i8_v4f16:
@@ -54,8 +52,7 @@ define <4 x i8> @vfptoui_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfptoui_v4i8_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i8_v4f16_unmasked:
@@ -155,17 +152,17 @@ define <4 x i64> @vfptoui_v4i64_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vfptoui_v4i64_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i64_v4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -175,17 +172,17 @@ define <4 x i64> @vfptoui_v4i64_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vfptoui_v4i64_v4f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i64_v4f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
@@ -197,9 +194,9 @@ define <4 x i8> @vfptoui_v4i8_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: vfptoui_v4i8_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i8> %v
@@ -209,9 +206,9 @@ define <4 x i8> @vfptoui_v4i8_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vfptoui_v4i8_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i8> %v
@@ -223,8 +220,7 @@ define <4 x i16> @vfptoui_v4i16_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptoui_v4i16_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
@@ -234,8 +230,7 @@ define <4 x i16> @vfptoui_v4i16_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
 ; CHECK-LABEL: vfptoui_v4i16_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i16> %v
@@ -269,8 +264,8 @@ define <4 x i64> @vfptoui_v4i64_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptoui_v4i64_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -280,8 +275,8 @@ define <4 x i64> @vfptoui_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl
 ; CHECK-LABEL: vfptoui_v4i64_v4f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
@@ -293,9 +288,9 @@ define <4 x i8> @vfptoui_v4i8_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vfptoui_v4i8_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -307,9 +302,9 @@ define <4 x i8> @vfptoui_v4i8_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vfptoui_v4i8_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -323,9 +318,9 @@ define <4 x i16> @vfptoui_v4i16_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptoui_v4i16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
@@ -335,9 +330,9 @@ define <4 x i16> @vfptoui_v4i16_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
 ; CHECK-LABEL: vfptoui_v4i16_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i16> %v
@@ -349,8 +344,7 @@ define <4 x i32> @vfptoui_v4i32_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptoui_v4i32_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
@@ -360,8 +354,7 @@ define <4 x i32> @vfptoui_v4i32_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev
 ; CHECK-LABEL: vfptoui_v4i32_v4f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
index 582706e4dfa18..ad93f3143c34c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
@@ -11,8 +11,7 @@ define <2 x half> @vfptrunc_v2f16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vfptrunc_v2f16_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
   ret <2 x half> %v
@@ -22,8 +21,7 @@ define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl
 ; CHECK-LABEL: vfptrunc_v2f16_v2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> splat (i1 true), i32 %vl)
   ret <2 x half> %v
@@ -35,9 +33,9 @@ define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroex
 ; CHECK-LABEL: vfptrunc_v2f16_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
   ret <2 x half> %v
@@ -47,9 +45,9 @@ define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32 zeroext %v
 ; CHECK-LABEL: vfptrunc_v2f16_v2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> splat (i1 true), i32 %vl)
   ret <2 x half> %v
@@ -61,8 +59,7 @@ define <2 x float> @vfptrunc_v2f32_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroe
 ; CHECK-LABEL: vfptrunc_v2f32_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> %m, i32 %vl)
   ret <2 x float> %v
@@ -72,8 +69,7 @@ define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32 zeroext %
 ; CHECK-LABEL: vfptrunc_v2f32_v2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> splat (i1 true), i32 %vl)
   ret <2 x float> %v
@@ -85,8 +81,7 @@ define <15 x float> @vfptrunc_v15f32_v15f64(<15 x double> %a, <15 x i1> %m, i32
 ; CHECK-LABEL: vfptrunc_v15f32_v15f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x float> @llvm.vp.fptrunc.v15f64.v15f32(<15 x double> %a, <15 x i1> %m, i32 %vl)
   ret <15 x float> %v
@@ -97,27 +92,26 @@ declare <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double>, <32 x i1>, i3
 define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vfptrunc_v32f32_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v12, v0, 2
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v24, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB7_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB7_2:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v24, v0.t
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    addi a1, a0, -16
 ; CHECK-NEXT:    sltu a0, a0, a1
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v24, v16, v0.t
+; CHECK-NEXT:    vfncvt.f.f.w v16, v16, v0.t
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v24, 16
+; CHECK-NEXT:    vslideup.vi v8, v16, 16
 ; CHECK-NEXT:    ret
   %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
   ret <32 x float> %v
@@ -129,8 +123,7 @@ define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zero
 ; CHECK-LABEL: vfptrunc_v2bf16_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
   ret <2 x bfloat> %v
@@ -140,8 +133,7 @@ define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext
 ; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
   ret <2 x bfloat> %v
@@ -153,9 +145,9 @@ define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zer
 ; CHECK-LABEL: vfptrunc_v2bf16_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
   ret <2 x bfloat> %v
@@ -165,9 +157,9 @@ define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext
 ; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
   ret <2 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index b52b082282ec4..f5cc12bbdf878 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -315,8 +315,8 @@ define <8 x float> @si2fp_v8i1_v8f32(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x float>
   ret <8 x float> %z
@@ -327,8 +327,8 @@ define <8 x float> @ui2fp_v8i1_v8f32(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x float>
   ret <8 x float> %z
@@ -369,8 +369,8 @@ define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
@@ -384,8 +384,8 @@ define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
@@ -399,8 +399,8 @@ define <8 x double> @si2fp_v8i1_v8f64(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x double>
   ret <8 x double> %z
@@ -411,8 +411,8 @@ define <8 x double> @ui2fp_v8i1_v8f64(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x double>
   ret <8 x double> %z
@@ -423,9 +423,9 @@ define void @si2fp_v2i64_v2bf16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x i64>, ptr %x
@@ -439,9 +439,9 @@ define void @ui2fp_v2i64_v2bf16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x i64>, ptr %x
@@ -481,9 +481,9 @@ define void @si2fp_v8i64_v8bf16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
@@ -497,9 +497,9 @@ define void @ui2fp_v8i64_v8bf16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
@@ -513,9 +513,9 @@ define <8 x bfloat> @si2fp_v8i1_v8bf16(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x bfloat>
   ret <8 x bfloat> %z
@@ -526,9 +526,9 @@ define <8 x bfloat> @ui2fp_v8i1_v8bf16(<8 x i1> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x bfloat>
   ret <8 x bfloat> %z
@@ -539,9 +539,9 @@ define void @si2fp_v2i64_v2f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x i64>, ptr %x
@@ -555,9 +555,9 @@ define void @ui2fp_v2i64_v2f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x i64>, ptr %x
@@ -613,9 +613,9 @@ define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
@@ -629,9 +629,9 @@ define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
@@ -653,9 +653,9 @@ define <8 x half> @si2fp_v8i1_v8f16(<8 x i1> %x) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vmerge.vim v9, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x half>
   ret <8 x half> %z
@@ -674,9 +674,9 @@ define <8 x half> @ui2fp_v8i1_v8f16(<8 x i1> %x) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vmerge.vim v9, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x half>
   ret <8 x half> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index 970f4b04e919a..8dbf3c3fdc569 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -49,8 +49,8 @@ define void @sext_v32i8_v32i32(ptr %x, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vle8.v v14, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
@@ -80,9 +80,9 @@ define void @trunc_v8i8_v8i32(ptr %x, ptr %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
index 0b08d9401402b..bb945f4c07893 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
@@ -52,8 +52,8 @@ define void @setgt_vv_v64i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v12, (a1)
-; CHECK-NEXT:    vmslt.vv v16, v12, v8
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmslt.vv v8, v12, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -69,8 +69,8 @@ define void @setlt_vv_v128i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vmslt.vv v24, v8, v16
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmslt.vv v8, v8, v16
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = load <128 x i8>, ptr %y
@@ -118,8 +118,8 @@ define void @setugt_vv_v32i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v10, (a1)
-; CHECK-NEXT:    vmsltu.vv v12, v10, v8
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsltu.vv v8, v10, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -135,8 +135,8 @@ define void @setult_vv_v64i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v12, (a1)
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsltu.vv v8, v8, v12
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -152,8 +152,8 @@ define void @setuge_vv_v128i8(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmsleu.vv v8, v16, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = load <128 x i8>, ptr %y
@@ -200,8 +200,8 @@ define void @setne_vx_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsne.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -217,8 +217,8 @@ define void @setgt_vx_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsgt.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -234,8 +234,8 @@ define void @setlt_vx_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmslt.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmslt.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -284,8 +284,8 @@ define void @setugt_vx_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsgtu.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -301,8 +301,8 @@ define void @setult_vx_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsltu.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsltu.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -319,8 +319,8 @@ define void @setuge_vx_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8
-; CHECK-NEXT:    vsm.v v24, (a2)
+; CHECK-NEXT:    vmsleu.vv v8, v16, v8
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -368,8 +368,8 @@ define void @setne_xv_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsne.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -385,8 +385,8 @@ define void @setgt_xv_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmslt.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmslt.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -402,8 +402,8 @@ define void @setlt_xv_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsgt.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -452,8 +452,8 @@ define void @setugt_xv_v32i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsltu.vx v10, v8, a1
-; CHECK-NEXT:    vsm.v v10, (a2)
+; CHECK-NEXT:    vmsltu.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -469,8 +469,8 @@ define void @setult_xv_v64i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 64
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a1
-; CHECK-NEXT:    vsm.v v12, (a2)
+; CHECK-NEXT:    vmsgtu.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -486,8 +486,8 @@ define void @setuge_xv_v128i8(ptr %x, i8 %y, ptr %z) {
 ; CHECK-NEXT:    li a3, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsleu.vx v16, v8, a1
-; CHECK-NEXT:    vsm.v v16, (a2)
+; CHECK-NEXT:    vmsleu.vx v8, v8, a1
+; CHECK-NEXT:    vsm.v v8, (a2)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %b = insertelement <128 x i8> poison, i8 %y, i32 0
@@ -534,8 +534,8 @@ define void @setne_vi_v32i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %d = icmp ne <32 x i8> %a, splat (i8 0)
@@ -549,8 +549,8 @@ define void @setgt_vi_v64i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vi v12, v8, 0
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsgt.vi v8, v8, 0
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp sgt <64 x i8> %a, splat (i8 0)
@@ -564,8 +564,8 @@ define void @setgt_vi_v64i8_nonzero(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vi v12, v8, 5
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsgt.vi v8, v8, 5
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp sgt <64 x i8> %a, splat (i8 5)
@@ -579,8 +579,8 @@ define void @setlt_vi_v128i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsle.vi v16, v8, -1
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmsle.vi v8, v8, -1
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %d = icmp slt <128 x i8> %a, splat (i8 0)
@@ -622,8 +622,8 @@ define void @setugt_vi_v32i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 5
-; CHECK-NEXT:    vsm.v v10, (a1)
+; CHECK-NEXT:    vmsgtu.vi v8, v8, 5
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %d = icmp ugt <32 x i8> %a, splat (i8 5)
@@ -637,8 +637,8 @@ define void @setult_vi_v64i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4
-; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    vmsleu.vi v8, v8, 4
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %d = icmp ult <64 x i8> %a, splat (i8 5)
@@ -652,8 +652,8 @@ define void @setuge_vi_v128i8(ptr %x, ptr %z) {
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4
-; CHECK-NEXT:    vsm.v v16, (a1)
+; CHECK-NEXT:    vmsgtu.vi v8, v8, 4
+; CHECK-NEXT:    vsm.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <128 x i8>, ptr %x
   %d = icmp uge <128 x i8> %a, splat (i8 5)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 0c30cbe4a42ef..795c06e1ad20e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -3215,25 +3215,25 @@ define void @mulhu_v16i16(ptr %x) {
 ; RV32-NEXT:    addi a1, a1, 289
 ; RV32-NEXT:    vmv1r.v v0, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RV32-NEXT:    vmerge.vim v16, v9, 1, v0
+; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vmv.s.x v0, a1
 ; RV32-NEXT:    lui a1, %hi(.LCPI182_0)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI182_0)
 ; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 3
-; RV32-NEXT:    vmerge.vim v9, v9, 2, v0
+; RV32-NEXT:    vmv.v.i v14, 3
+; RV32-NEXT:    vmerge.vim v16, v14, 2, v0
 ; RV32-NEXT:    vle16.v v14, (a1)
 ; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    vmerge.vim v17, v9, 1, v0
+; RV32-NEXT:    vmerge.vim v16, v16, 1, v0
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vsext.vf2 v8, v16
+; RV32-NEXT:    vsext.vf2 v8, v9
 ; RV32-NEXT:    vsrl.vv v8, v10, v8
 ; RV32-NEXT:    vmulhu.vv v8, v8, v14
 ; RV32-NEXT:    vsub.vv v10, v10, v8
 ; RV32-NEXT:    vmulhu.vv v10, v10, v12
 ; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsext.vf2 v10, v17
+; RV32-NEXT:    vsext.vf2 v10, v16
 ; RV32-NEXT:    vsrl.vv v8, v8, v10
 ; RV32-NEXT:    vse16.v v8, (a0)
 ; RV32-NEXT:    ret
@@ -3253,11 +3253,11 @@ define void @mulhu_v16i16(ptr %x) {
 ; RV64-NEXT:    li a1, 1
 ; RV64-NEXT:    slli a1, a1, 48
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vmv.v.x v15, a1
 ; RV64-NEXT:    lui a1, %hi(.LCPI182_1)
 ; RV64-NEXT:    ld a1, %lo(.LCPI182_1)(a1)
 ; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT:    vsext.vf2 v14, v16
+; RV64-NEXT:    vsext.vf2 v14, v15
 ; RV64-NEXT:    vsrl.vv v14, v8, v14
 ; RV64-NEXT:    vmulhu.vv v12, v14, v12
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
index 8b6270e86af36..1890f7c60735c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inttoptr-ptrtoint.ll
@@ -7,8 +7,8 @@ define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: inttoptr_v4p0_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x ptr> %v
@@ -30,8 +30,7 @@ define <4 x i32> @ptrtoint_v4i32_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %e
 ; CHECK-LABEL: ptrtoint_v4i32_v4p0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
index 466fe744a1376..0380e92b3c5c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll
@@ -46,15 +46,15 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: llrint_v3i64_v3f32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v3i64_v3f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vmv1r.v v10, v8
-; RV64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV64-NEXT:    vmv1r.v v9, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV64-NEXT:    ret
   %a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
   ret <3 x i64> %a
@@ -65,15 +65,15 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: llrint_v4i64_v4f32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v4i64_v4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vmv1r.v v10, v8
-; RV64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV64-NEXT:    vmv1r.v v9, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV64-NEXT:    ret
   %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %a
@@ -84,15 +84,15 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: llrint_v8i64_v8f32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v8i64_v8f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
+; RV64-NEXT:    vmv2r.v v10, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
 ; RV64-NEXT:    ret
   %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
   ret <8 x i64> %a
@@ -103,15 +103,15 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroe
 ; RV32-LABEL: llrint_v16i64_v16f32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vmv4r.v v16, v8
-; RV32-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
+; RV32-NEXT:    vmv4r.v v12, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v16i64_v16f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vmv4r.v v16, v8
-; RV64-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
+; RV64-NEXT:    vmv4r.v v12, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
 ; RV64-NEXT:    ret
   %a = call <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
index 5b5163c17a5c9..4f15ed4bccd9c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll
@@ -70,8 +70,8 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) {
 ; RV64-i64-LABEL: lrint_v3f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT:    vmv1r.v v10, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV64-i64-NEXT:    vmv1r.v v9, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl)
   ret <3 x iXLen> %a
@@ -94,8 +94,8 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) {
 ; RV64-i64-LABEL: lrint_v4f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT:    vmv1r.v v10, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV64-i64-NEXT:    vmv1r.v v9, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl)
   ret <4 x iXLen> %a
@@ -118,8 +118,8 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) {
 ; RV64-i64-LABEL: lrint_v8f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i64-NEXT:    vmv2r.v v12, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
+; RV64-i64-NEXT:    vmv2r.v v10, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl)
   ret <8 x iXLen> %a
@@ -142,8 +142,8 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %ev
 ; RV64-i64-LABEL: lrint_v16f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-i64-NEXT:    vmv4r.v v16, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
+; RV64-i64-NEXT:    vmv4r.v v12, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl)
   ret <16 x iXLen> %a
@@ -154,15 +154,13 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: lrint_v1f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_v1f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_v1f64:
@@ -179,15 +177,13 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: lrint_v2f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_v2f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_v2f64:
@@ -204,15 +200,13 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: lrint_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_v4f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
-; RV64-i32-NEXT:    vmv.v.v v8, v10
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_v4f64:
@@ -229,15 +223,13 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: lrint_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_v8f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
-; RV64-i32-NEXT:    vmv.v.v v8, v12
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_v8f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 979785dd2c024..412769301a4e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -167,8 +167,8 @@ define void @splat_v32i1(ptr %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
-; CHECK-NEXT:    vsm.v v10, (a0)
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vsm.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = insertelement <32 x i1> poison, i1 %y, i32 0
   %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer
@@ -201,8 +201,8 @@ define void @splat_v64i1(ptr %x, i1 %y) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vmsne.vi v12, v8, 0
-; CHECK-NEXT:    vsm.v v12, (a0)
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vsm.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = insertelement <64 x i1> poison, i1 %y, i32 0
   %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 533b8b6864ebc..4a2ff31989b01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -522,15 +522,13 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
 ; RV32-LABEL: mgather_truemask_v4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4i8:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv1r.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
@@ -1211,15 +1209,13 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
 ; RV32-LABEL: mgather_truemask_v4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4i16:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv1r.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
@@ -2235,8 +2231,7 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
 ; RV64V-LABEL: mgather_truemask_v4i32:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv.v.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
@@ -3645,8 +3640,8 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
 ; RV32V-LABEL: mgather_truemask_v4i64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32V-NEXT:    vmv1r.v v10, v8
-; RV32V-NEXT:    vluxei32.v v8, (zero), v10
+; RV32V-NEXT:    vmv1r.v v9, v8
+; RV32V-NEXT:    vluxei32.v v8, (zero), v9
 ; RV32V-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4i64:
@@ -6552,8 +6547,8 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
 ; RV32V-LABEL: mgather_baseidx_v8i64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT:    vnsrl.wi v16, v8, 0
-; RV32V-NEXT:    vsll.vi v8, v16, 3
+; RV32V-NEXT:    vnsrl.wi v8, v8, 0
+; RV32V-NEXT:    vsll.vi v8, v8, 3
 ; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
 ; RV32V-NEXT:    vmv.v.v v8, v12
@@ -6992,15 +6987,13 @@ define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %pass
 ; RV32-LABEL: mgather_truemask_v4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4bf16:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv1r.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
@@ -7956,15 +7949,13 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru)
 ; RV32-LABEL: mgather_truemask_v4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4f16:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv1r.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
@@ -9427,8 +9418,7 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr
 ; RV64V-LABEL: mgather_truemask_v4f32:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-NEXT:    vluxei64.v v10, (zero), v8
-; RV64V-NEXT:    vmv.v.v v8, v10
+; RV64V-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
@@ -10803,8 +10793,8 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt
 ; RV32V-LABEL: mgather_truemask_v4f64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32V-NEXT:    vmv1r.v v10, v8
-; RV32V-NEXT:    vluxei32.v v8, (zero), v10
+; RV32V-NEXT:    vmv1r.v v9, v8
+; RV32V-NEXT:    vluxei32.v v8, (zero), v9
 ; RV32V-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_truemask_v4f64:
@@ -13026,8 +13016,8 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
 ; RV32V-LABEL: mgather_baseidx_v8f64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT:    vnsrl.wi v16, v8, 0
-; RV32V-NEXT:    vsll.vi v8, v16, 3
+; RV32V-NEXT:    vnsrl.wi v8, v8, 0
+; RV32V-NEXT:    vsll.vi v8, v8, 3
 ; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
 ; RV32V-NEXT:    vmv.v.v v8, v12
@@ -14008,9 +13998,9 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) {
 ; RV64V-NEXT:    vmv.v.i v0, 5
 ; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64V-NEXT:    vmv.v.x v8, a1
-; RV64V-NEXT:    vmerge.vim v10, v8, 0, v0
+; RV64V-NEXT:    vmerge.vim v8, v8, 0, v0
 ; RV64V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64V-NEXT:    vluxei64.v v8, (a0), v10
+; RV64V-NEXT:    vluxei64.v v8, (a0), v8
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_narrow_edge_case:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index e86fae6d501e5..b161afe47687a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -5563,8 +5563,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ; RV32V-LABEL: mscatter_baseidx_v8i64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT:    vnsrl.wi v16, v12, 0
-; RV32V-NEXT:    vsll.vi v12, v16, 3
+; RV32V-NEXT:    vnsrl.wi v12, v12, 0
+; RV32V-NEXT:    vsll.vi v12, v12, 3
 ; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; RV32V-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32V-NEXT:    ret
@@ -11720,8 +11720,8 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
 ; RV32V-LABEL: mscatter_baseidx_v8f64:
 ; RV32V:       # %bb.0:
 ; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT:    vnsrl.wi v16, v12, 0
-; RV32V-NEXT:    vsll.vi v12, v16, 3
+; RV32V-NEXT:    vnsrl.wi v12, v12, 0
+; RV32V-NEXT:    vsll.vi v12, v12, 3
 ; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; RV32V-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32V-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
index b6c441290ee45..7a1051bf4cc3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
@@ -135,20 +135,18 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
@@ -264,19 +262,17 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_nearbyint_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
@@ -308,19 +304,17 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer
 ; CHECK-LABEL: vp_nearbyint_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
@@ -393,20 +387,18 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
@@ -437,20 +429,18 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
@@ -481,20 +471,18 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
@@ -525,20 +513,18 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
@@ -569,16 +555,14 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -588,9 +572,8 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
index 35cd789acfcc8..0e18143313345 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
@@ -123,19 +123,17 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
-; CHECK-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -240,18 +238,16 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vp_rint_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -280,18 +276,16 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_rint_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -357,19 +351,17 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -397,19 +389,17 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -437,19 +427,17 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -477,19 +465,17 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -517,16 +503,14 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -535,9 +519,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
 ; CHECK-NEXT:    sltu a0, a0, a1
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
index d8ff7062f033e..28723ee87f339 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
@@ -271,21 +271,19 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_v16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_v16f16:
@@ -444,20 +442,18 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev
 ; CHECK-LABEL: vp_round_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -488,20 +484,18 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_round_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -573,21 +567,19 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -617,21 +609,19 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -661,21 +651,19 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -705,21 +693,19 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -749,16 +735,14 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -768,9 +752,8 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
index 2649f234375d2..4c57491a481b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
@@ -271,21 +271,19 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_v16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_v16f16:
@@ -444,20 +442,18 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vp_roundeven_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -488,20 +484,18 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer
 ; CHECK-LABEL: vp_roundeven_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -573,21 +567,19 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -617,21 +609,19 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -661,21 +651,19 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -705,21 +693,19 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -749,16 +735,14 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -768,9 +752,8 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a1, 0
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
index 50e65b62e7848..ab83752dfa262 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
@@ -271,21 +271,19 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32)
 define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_v16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI6_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI6_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI6_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_v16f16:
@@ -444,20 +442,18 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: vp_roundtozero_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -488,20 +484,18 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z
 ; CHECK-LABEL: vp_roundtozero_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
   ret <16 x float> %v
@@ -573,21 +567,19 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
 define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI18_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI18_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -617,21 +609,19 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
 define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI20_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI20_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
@@ -661,21 +651,19 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
 define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v15f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI22_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI22_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
   ret <15 x double> %v
@@ -705,21 +693,19 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
 define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v16f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI24_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
   ret <16 x double> %v
@@ -749,16 +735,14 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32)
 define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv1r.v v6, v0
 ; CHECK-NEXT:    li a2, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v7, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB26_2:
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v8, v0.t
 ; CHECK-NEXT:    lui a1, %hi(.LCPI26_0)
@@ -768,9 +752,8 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a1, 1
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
 ; CHECK-NEXT:    fsrm a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
index c0a213034c95b..aefae158f0771 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
@@ -76,9 +76,9 @@ define signext i32 @sad_16x8_as_i32(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vminu.vv v10, v8, v9
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
-; CHECK-NEXT:    vsub.vv v12, v8, v10
+; CHECK-NEXT:    vsub.vv v11, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -114,21 +114,21 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea
 ; CHECK-NEXT:    vsub.vv v8, v8, v14
 ; CHECK-NEXT:    vminu.vv v14, v10, v11
 ; CHECK-NEXT:    vmaxu.vv v10, v10, v11
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vsub.vv v10, v10, v14
+; CHECK-NEXT:    vle8.v v15, (a1)
+; CHECK-NEXT:    vsub.vv v11, v10, v14
 ; CHECK-NEXT:    vminu.vv v14, v12, v13
-; CHECK-NEXT:    vmaxu.vv v15, v12, v13
-; CHECK-NEXT:    vwaddu.vv v12, v10, v8
-; CHECK-NEXT:    vsub.vv v8, v15, v14
-; CHECK-NEXT:    vminu.vv v10, v9, v11
-; CHECK-NEXT:    vmaxu.vv v9, v9, v11
+; CHECK-NEXT:    vmaxu.vv v12, v12, v13
+; CHECK-NEXT:    vwaddu.vv v10, v11, v8
+; CHECK-NEXT:    vsub.vv v8, v12, v14
+; CHECK-NEXT:    vminu.vv v14, v9, v15
+; CHECK-NEXT:    vmaxu.vv v9, v9, v15
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v14, v8
+; CHECK-NEXT:    vzext.vf2 v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vsub.vv v16, v9, v10
+; CHECK-NEXT:    vsub.vv v14, v9, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
-; CHECK-NEXT:    vzext.vf2 v12, v16
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
+; CHECK-NEXT:    vzext.vf2 v12, v14
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index ba64655947602..ab41b0dfdb80d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -26,8 +26,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half> %va, <7 x half> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl)
   ret <7 x i1> %v
@@ -48,8 +47,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -68,10 +66,9 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -92,10 +89,9 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -114,10 +110,9 @@ define <8 x i1> @fcmp_ogt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -136,10 +131,9 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -160,10 +154,9 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -182,10 +175,9 @@ define <8 x i1> @fcmp_oge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -204,10 +196,9 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -228,10 +219,9 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -252,8 +242,7 @@ define <8 x i1> @fcmp_olt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"olt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -272,10 +261,9 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -296,10 +284,9 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -320,8 +307,7 @@ define <8 x i1> @fcmp_ole_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ole", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -340,10 +326,9 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -364,10 +349,9 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -412,12 +396,12 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v13, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -439,12 +423,12 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v13, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -493,14 +477,14 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v13, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v10, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v13, v10
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -524,14 +508,14 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v13, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v10, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v10, v13
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -576,12 +560,12 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v13, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -603,12 +587,12 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmnor.mm v0, v13, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmnor.mm v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -649,12 +633,12 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -674,12 +658,12 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -720,12 +704,12 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -745,12 +729,12 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -770,9 +754,9 @@ define <8 x i1> @fcmp_ult_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
@@ -791,12 +775,12 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -816,12 +800,12 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -841,9 +825,9 @@ define <8 x i1> @fcmp_ule_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ule", <8 x i1> %m, i32 %evl)
@@ -862,12 +846,12 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -887,12 +871,12 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFHMIN-NEXT:    vmnot.m v0, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -913,8 +897,7 @@ define <8 x i1> @fcmp_une_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"une", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -933,10 +916,9 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -957,10 +939,9 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1009,14 +990,14 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v13, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v10, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v13, v10
+; ZVFHMIN-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1040,14 +1021,14 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v13, v10, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vv v10, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v10, v13
+; ZVFHMIN-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -1081,7 +1062,7 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
 ; ZVFH-NEXT:    li a0, 64
 ; ZVFH-NEXT:  .LBB43_2:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v6, v8, v24, v0.t
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v24, v0.t
 ; ZVFH-NEXT:    addi a0, a2, -64
 ; ZVFH-NEXT:    sltu a1, a2, a0
 ; ZVFH-NEXT:    addi a1, a1, -1
@@ -1090,10 +1071,10 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
 ; ZVFH-NEXT:    addi a1, sp, 16
 ; ZVFH-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v8, v24, v16, v0.t
+; ZVFH-NEXT:    vmfeq.vv v9, v24, v16, v0.t
 ; ZVFH-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; ZVFH-NEXT:    vslideup.vi v6, v8, 8
-; ZVFH-NEXT:    vmv.v.v v0, v6
+; ZVFH-NEXT:    vslideup.vi v8, v9, 8
+; ZVFH-NEXT:    vmv.v.v v0, v8
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    add sp, sp, a0
@@ -3367,8 +3348,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1>
 ; CHECK-LABEL: fcmp_oeq_vv_v7f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double> %va, <7 x double> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl)
   ret <7 x i1> %v
@@ -3380,8 +3360,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_oeq_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3391,8 +3370,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_oeq_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3404,8 +3382,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_oeq_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3417,8 +3394,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ogt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3428,8 +3404,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ogt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3441,8 +3416,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ogt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3454,8 +3428,7 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_oge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3465,8 +3438,7 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_oge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3478,8 +3450,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_oge_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3491,8 +3462,7 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_olt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3502,8 +3472,7 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_olt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3515,8 +3484,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_olt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3528,8 +3496,7 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ole_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3539,8 +3506,7 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ole_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3552,8 +3518,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ole_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3566,8 +3531,8 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v17, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3578,8 +3543,8 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3592,8 +3557,8 @@ define <8 x i1> @fcmp_one_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3605,9 +3570,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3619,9 +3584,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3635,9 +3600,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3650,8 +3615,8 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v17, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3662,8 +3627,8 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3676,8 +3641,8 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3689,8 +3654,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ugt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3700,8 +3665,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ugt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3713,8 +3678,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ugt_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3726,8 +3691,8 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_uge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3737,8 +3702,8 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_uge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3750,8 +3715,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_uge_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3763,8 +3728,8 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ult_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3774,8 +3739,8 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ult_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3787,8 +3752,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ult_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3800,8 +3765,8 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_ule_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3811,8 +3776,8 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_ule_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3824,8 +3789,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_ule_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3837,8 +3802,7 @@ define <8 x i1> @fcmp_une_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_une_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3848,8 +3812,7 @@ define <8 x i1> @fcmp_une_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-LABEL: fcmp_une_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3861,8 +3824,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-LABEL: fcmp_une_vf_swap_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3874,9 +3836,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1>
 ; CHECK-LABEL: fcmp_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -3888,9 +3850,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3904,9 +3866,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v12, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> poison, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -3940,7 +3902,7 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x
 ; CHECK-NEXT:    li a0, 16
 ; CHECK-NEXT:  .LBB87_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v6, v8, v24, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    addi a0, a2, -16
 ; CHECK-NEXT:    sltu a1, a2, a0
 ; CHECK-NEXT:    addi a1, a1, -1
@@ -3949,10 +3911,10 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x
 ; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v24, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v9, v24, v16, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vi v6, v8, 2
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
index ad57a6037652f..b83f367bd2c6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
@@ -613,7 +613,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1>
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a2, a4, a2
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v6, v16, v24, v0.t
+; CHECK-NEXT:    vmseq.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a3, a1, .LBB51_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 128
@@ -622,9 +622,8 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1>
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v6
+; CHECK-NEXT:    vmseq.vv v0, v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -649,16 +648,15 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a1, a4, a1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB52_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:  .LBB52_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
   %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
@@ -679,16 +677,15 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m,
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a1, a4, a1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB53_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 128
 ; CHECK-NEXT:  .LBB53_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0
   %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
@@ -702,8 +699,7 @@ define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_eq_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -713,8 +709,7 @@ define <8 x i1> @icmp_eq_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex
 ; CHECK-LABEL: icmp_eq_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -726,8 +721,7 @@ define <8 x i1> @icmp_eq_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z
 ; CHECK-LABEL: icmp_eq_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -739,8 +733,7 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_eq_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -750,8 +743,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_eq_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmseq.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -761,8 +753,7 @@ define <8 x i1> @icmp_ne_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ne_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -772,8 +763,7 @@ define <8 x i1> @icmp_ne_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex
 ; CHECK-LABEL: icmp_ne_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -785,8 +775,7 @@ define <8 x i1> @icmp_ne_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z
 ; CHECK-LABEL: icmp_ne_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -798,8 +787,7 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ne_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -809,8 +797,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_ne_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -820,8 +807,7 @@ define <8 x i1> @icmp_ugt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ugt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -831,8 +817,7 @@ define <8 x i1> @icmp_ugt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_ugt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -844,8 +829,7 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ugt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -857,8 +841,7 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ugt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -868,8 +851,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ugt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -879,8 +861,7 @@ define <8 x i1> @icmp_uge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_uge_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -890,10 +871,9 @@ define <8 x i1> @icmp_uge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_uge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -905,8 +885,7 @@ define <8 x i1> @icmp_uge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_uge_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -918,8 +897,7 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_uge_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -929,8 +907,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_uge_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -940,8 +917,7 @@ define <8 x i1> @icmp_ult_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ult_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -951,8 +927,7 @@ define <8 x i1> @icmp_ult_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_ult_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -964,8 +939,7 @@ define <8 x i1> @icmp_ult_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ult_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -977,8 +951,7 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ult_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsleu.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -988,8 +961,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ult_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -999,8 +971,7 @@ define <8 x i1> @icmp_sgt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sgt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1010,8 +981,7 @@ define <8 x i1> @icmp_sgt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sgt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1023,8 +993,7 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sgt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1036,8 +1005,7 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sgt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1047,8 +1015,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sgt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1058,8 +1025,7 @@ define <8 x i1> @icmp_sge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sge_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1069,10 +1035,9 @@ define <8 x i1> @icmp_sge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sge_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1084,8 +1049,7 @@ define <8 x i1> @icmp_sge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sge_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1097,8 +1061,7 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sge_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1108,8 +1071,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sge_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1119,8 +1081,7 @@ define <8 x i1> @icmp_slt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_slt_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1130,8 +1091,7 @@ define <8 x i1> @icmp_slt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_slt_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1143,8 +1103,7 @@ define <8 x i1> @icmp_slt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_slt_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1156,8 +1115,7 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_slt_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1167,8 +1125,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_slt_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1178,8 +1135,7 @@ define <8 x i1> @icmp_sle_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sle_vv_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1189,8 +1145,7 @@ define <8 x i1> @icmp_sle_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe
 ; CHECK-LABEL: icmp_sle_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vx v10, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1202,10 +1157,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_sle_vx_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -1217,8 +1171,7 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sle_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsle.vi v10, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1228,8 +1181,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sle_vi_swap_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsgt.vi v10, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1261,7 +1213,7 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:  .LBB99_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v6, v8, v24, v0.t
+; CHECK-NEXT:    vmseq.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    addi a0, a2, -32
 ; CHECK-NEXT:    sltu a1, a2, a0
 ; CHECK-NEXT:    addi a1, a1, -1
@@ -1270,10 +1222,10 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
 ; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v8, v24, v16, v0.t
+; CHECK-NEXT:    vmseq.vv v9, v24, v16, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v6, v8, 4
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -1297,17 +1249,17 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB100_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    addi a2, a1, -32
 ; CHECK-NEXT:    sltu a1, a1, a2
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a1, a1, a2
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v9, v16, a0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v25, v8, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer
@@ -1327,17 +1279,17 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB101_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v8, v8, a0, v0.t
 ; CHECK-NEXT:    addi a2, a1, -32
 ; CHECK-NEXT:    sltu a1, a1, a2
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    and a1, a1, a2
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v8, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v9, v16, a0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v25, v8, 4
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer
@@ -1351,8 +1303,7 @@ define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_eq_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1367,10 +1318,9 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1378,8 +1328,7 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV64-LABEL: icmp_eq_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1396,10 +1345,9 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmseq.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmseq.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1407,8 +1355,7 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV64-LABEL: icmp_eq_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1420,8 +1367,7 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_eq_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1431,8 +1377,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_eq_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1442,8 +1387,7 @@ define <8 x i1> @icmp_ne_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32
 ; CHECK-LABEL: icmp_ne_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1458,10 +1402,9 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1469,8 +1412,7 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
 ; RV64-LABEL: icmp_ne_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1487,10 +1429,9 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsne.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsne.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1498,8 +1439,7 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
 ; RV64-LABEL: icmp_ne_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1511,8 +1451,7 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ne_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1522,8 +1461,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: icmp_ne_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1533,8 +1471,7 @@ define <8 x i1> @icmp_ugt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ugt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1549,10 +1486,9 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1560,8 +1496,7 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_ugt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1578,10 +1513,9 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1589,8 +1523,7 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_ugt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1602,8 +1535,7 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ugt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1613,8 +1545,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ugt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1624,8 +1555,7 @@ define <8 x i1> @icmp_uge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_uge_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1640,10 +1570,9 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1651,10 +1580,9 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_uge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1671,10 +1599,9 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsleu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsleu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1682,8 +1609,7 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_uge_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsleu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1695,8 +1621,7 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_uge_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1706,8 +1631,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_uge_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1717,8 +1641,7 @@ define <8 x i1> @icmp_ult_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_ult_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1733,10 +1656,9 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1744,8 +1666,7 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_ult_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1762,10 +1683,9 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsltu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1773,8 +1693,7 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_ult_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1786,8 +1705,7 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_ult_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1797,8 +1715,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_ult_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1808,8 +1725,7 @@ define <8 x i1> @icmp_sgt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sgt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1824,10 +1740,9 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1835,8 +1750,7 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_sgt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1853,10 +1767,9 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1864,8 +1777,7 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_sgt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1877,8 +1789,7 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sgt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1888,8 +1799,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sgt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1899,8 +1809,7 @@ define <8 x i1> @icmp_sge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sge_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1915,10 +1824,9 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1926,10 +1834,9 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_sge_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1946,10 +1853,9 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -1957,8 +1863,7 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_sge_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -1970,8 +1875,7 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sge_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1981,8 +1885,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sge_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -1992,8 +1895,7 @@ define <8 x i1> @icmp_slt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_slt_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2008,10 +1910,9 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2019,8 +1920,7 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_slt_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2037,10 +1937,9 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmslt.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2048,8 +1947,7 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_slt_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2061,8 +1959,7 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_slt_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2072,8 +1969,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_slt_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2083,8 +1979,7 @@ define <8 x i1> @icmp_sle_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3
 ; CHECK-LABEL: icmp_sle_vv_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2099,10 +1994,9 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v8, v16, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2110,8 +2004,7 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
 ; RV64-LABEL: icmp_sle_vx_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2128,10 +2021,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vlse64.v v12, (a0), zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -2139,10 +2031,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
 ; RV64-LABEL: icmp_sle_vx_swap_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmv.v.x v12, a0
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -2154,8 +2045,7 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: icmp_sle_vi_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
@@ -2165,8 +2055,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext
 ; CHECK-LABEL: icmp_sle_vi_swap_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl)
   ret <8 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
index d8dc1f3588633..2f27b40378da1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll
@@ -128,8 +128,8 @@ define <4 x i64> @vsext_v4i64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vsext_v4i64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -139,8 +139,8 @@ define <4 x i64> @vsext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vsext_v4i64_v4i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
index c05f306424519..5e0f484b707c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
@@ -105,9 +105,9 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) {
 ; CHECK-NEXT:    vslidedown.vi v12, v12, 3, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 ; CHECK-NEXT:    ret
   %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14>
   ret <4 x i32> %2
@@ -338,8 +338,7 @@ define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 8
 ; CHECK-NEXT:    ret
   %res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63>
   ret <32 x i8> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index b6267bf481c85..4078f2c895317 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -653,9 +653,9 @@ define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
 ; V-NEXT:    vle32.v v8, (a0)
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; V-NEXT:    vnsrl.wi v16, v8, 0
+; V-NEXT:    vnsrl.wi v8, v8, 0
 ; V-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; V-NEXT:    vse32.v v16, (a1)
+; V-NEXT:    vse32.v v8, (a1)
 ; V-NEXT:    ret
 ;
 ; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8:
@@ -680,9 +680,9 @@ define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
 ; ZIP-NEXT:    vle32.v v8, (a0)
 ; ZIP-NEXT:    li a0, 32
 ; ZIP-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZIP-NEXT:    vnsrl.wi v16, v8, 0
+; ZIP-NEXT:    vnsrl.wi v8, v8, 0
 ; ZIP-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; ZIP-NEXT:    vse32.v v16, (a1)
+; ZIP-NEXT:    vse32.v v8, (a1)
 ; ZIP-NEXT:    ret
 entry:
   %0 = load <64 x i32>, ptr %in, align 4
@@ -699,9 +699,9 @@ define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) {
 ; V-NEXT:    vle32.v v8, (a0)
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; V-NEXT:    vnsrl.wi v16, v8, 0
+; V-NEXT:    vnsrl.wi v8, v8, 0
 ; V-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; V-NEXT:    vse32.v v16, (a1)
+; V-NEXT:    vse32.v v8, (a1)
 ; V-NEXT:    ret
 ;
 ; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8_2:
@@ -725,9 +725,9 @@ define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) {
 ; ZIP-NEXT:    vle32.v v8, (a0)
 ; ZIP-NEXT:    li a0, 32
 ; ZIP-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; ZIP-NEXT:    vnsrl.wi v16, v8, 0
+; ZIP-NEXT:    vnsrl.wi v8, v8, 0
 ; ZIP-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; ZIP-NEXT:    vse32.v v16, (a1)
+; ZIP-NEXT:    vse32.v v8, (a1)
 ; ZIP-NEXT:    ret
 entry:
   %0 = load <64 x i32>, ptr %in, align 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index 9629b3547b3d0..b111d3592769b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -70,10 +70,10 @@ define <8 x i32> @m2_pair_swap_vl8(<8 x i32> %v1) vscale_range(2,2) {
 ; RV32-LABEL: m2_pair_swap_vl8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v12, 0
+; RV32-NEXT:    vmv.v.i v11, 0
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    li a1, 63
-; RV32-NEXT:    vwsubu.vx v10, v12, a0
+; RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vand.vx v10, v10, a1
@@ -338,10 +338,10 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) {
 ; RV32-LABEL: multi_chunks_shuffle:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v12, 0
+; RV32-NEXT:    vmv.v.i v11, 0
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    li a1, 63
-; RV32-NEXT:    vwsubu.vx v10, v12, a0
+; RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    lui a0, 61681
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll
index e6eabe0d81309..83ea3b9df70c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll
@@ -91,8 +91,8 @@ define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v11, v10
+; V128-NEXT:    vmv1r.v v9, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
@@ -114,11 +114,10 @@ define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
 ; V128-LABEL: interleave_v8f16:
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v10, v11
+; V128-NEXT:    vmv1r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v8, a0, v11
+; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v8f16:
@@ -138,8 +137,8 @@ define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; V128-NEXT:    vmv2r.v v12, v10
-; V128-NEXT:    vmv2r.v v14, v8
-; V128-NEXT:    vwaddu.vv v8, v14, v12
+; V128-NEXT:    vmv2r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v10, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v12
 ; V128-NEXT:    ret
@@ -161,8 +160,8 @@ define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; V128-NEXT:    vmv2r.v v12, v10
-; V128-NEXT:    vmv2r.v v14, v8
-; V128-NEXT:    vwaddu.vv v8, v14, v12
+; V128-NEXT:    vmv2r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v10, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v12
 ; V128-NEXT:    ret
@@ -184,8 +183,8 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vmv4r.v v16, v12
-; V128-NEXT:    vmv4r.v v20, v8
-; V128-NEXT:    vwaddu.vv v8, v20, v16
+; V128-NEXT:    vmv4r.v v12, v8
+; V128-NEXT:    vwaddu.vv v8, v12, v16
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v16
 ; V128-NEXT:    ret
@@ -194,8 +193,8 @@ define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
 ; V512-NEXT:    vmv1r.v v10, v9
-; V512-NEXT:    vmv1r.v v11, v8
-; V512-NEXT:    vwaddu.vv v8, v11, v10
+; V512-NEXT:    vmv1r.v v9, v8
+; V512-NEXT:    vwaddu.vv v8, v9, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v10
 ; V512-NEXT:    ret
@@ -208,10 +207,10 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V128-NEXT:    vmv4r.v v16, v12
-; V128-NEXT:    vmv4r.v v20, v8
+; V128-NEXT:    vmv4r.v v12, v8
 ; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; V128-NEXT:    vwaddu.vv v8, v20, v16
+; V128-NEXT:    vwaddu.vv v8, v12, v16
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v16
 ; V128-NEXT:    ret
@@ -220,10 +219,10 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V512-NEXT:    vmv1r.v v10, v9
-; V512-NEXT:    vmv1r.v v11, v8
+; V512-NEXT:    vmv1r.v v9, v8
 ; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; V512-NEXT:    vwaddu.vv v8, v11, v10
+; V512-NEXT:    vwaddu.vv v8, v9, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v10
 ; V512-NEXT:    ret
@@ -275,10 +274,10 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V512-NEXT:    vmv2r.v v12, v10
-; V512-NEXT:    vmv2r.v v14, v8
+; V512-NEXT:    vmv2r.v v10, v8
 ; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; V512-NEXT:    vwaddu.vv v8, v14, v12
+; V512-NEXT:    vwaddu.vv v8, v10, v12
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v12
 ; V512-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
index f580b1b993395..d689fb94f4a34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll
@@ -435,13 +435,13 @@ define <16 x float> @shuffle_disjoint_lanes(<16 x float> %v, <16 x float> %w) {
 ; CHECK-NEXT:    lui a0, %hi(.LCPI34_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI34_0)
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle8.v v18, (a0)
+; CHECK-NEXT:    vle8.v v17, (a0)
 ; CHECK-NEXT:    lui a0, 11
 ; CHECK-NEXT:    addi a0, a0, -1366
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v18
+; CHECK-NEXT:    vsext.vf2 v16, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v8, v12, v16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
index 917613d5c786f..a65190b93b621 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
@@ -172,8 +172,8 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v11, v10
+; V128-NEXT:    vmv1r.v v9, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
@@ -309,11 +309,10 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; V128-LABEL: interleave_v8i16:
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v10, v11
+; V128-NEXT:    vmv1r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v8, a0, v11
+; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v8i16:
@@ -341,8 +340,8 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; V128-NEXT:    vmv2r.v v12, v10
-; V128-NEXT:    vmv2r.v v14, v8
-; V128-NEXT:    vwaddu.vv v8, v14, v12
+; V128-NEXT:    vmv2r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v10, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v12
 ; V128-NEXT:    ret
@@ -372,8 +371,8 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v11, v10
+; V128-NEXT:    vmv1r.v v9, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
@@ -405,8 +404,8 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; V128-NEXT:    vmv2r.v v12, v10
-; V128-NEXT:    vmv2r.v v14, v8
-; V128-NEXT:    vwaddu.vv v8, v14, v12
+; V128-NEXT:    vmv2r.v v10, v8
+; V128-NEXT:    vwaddu.vv v8, v10, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v12
 ; V128-NEXT:    ret
@@ -438,8 +437,8 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vmv4r.v v16, v12
-; V128-NEXT:    vmv4r.v v20, v8
-; V128-NEXT:    vwaddu.vv v8, v20, v16
+; V128-NEXT:    vmv4r.v v12, v8
+; V128-NEXT:    vwaddu.vv v8, v12, v16
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v16
 ; V128-NEXT:    ret
@@ -448,8 +447,8 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
 ; V512-NEXT:    vmv1r.v v10, v9
-; V512-NEXT:    vmv1r.v v11, v8
-; V512-NEXT:    vwaddu.vv v8, v11, v10
+; V512-NEXT:    vmv1r.v v9, v8
+; V512-NEXT:    vwaddu.vv v8, v9, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v10
 ; V512-NEXT:    ret
@@ -472,10 +471,10 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V128-NEXT:    vmv2r.v v12, v10
-; V128-NEXT:    vmv2r.v v14, v8
+; V128-NEXT:    vmv2r.v v10, v8
 ; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; V128-NEXT:    vwaddu.vv v8, v14, v12
+; V128-NEXT:    vwaddu.vv v8, v10, v12
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v12
 ; V128-NEXT:    ret
@@ -508,10 +507,10 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 ; V128:       # %bb.0:
 ; V128-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V128-NEXT:    vmv4r.v v16, v12
-; V128-NEXT:    vmv4r.v v20, v8
+; V128-NEXT:    vmv4r.v v12, v8
 ; V128-NEXT:    li a0, 32
 ; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; V128-NEXT:    vwaddu.vv v8, v20, v16
+; V128-NEXT:    vwaddu.vv v8, v12, v16
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v16
 ; V128-NEXT:    ret
@@ -520,10 +519,10 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V512-NEXT:    vmv1r.v v10, v9
-; V512-NEXT:    vmv1r.v v11, v8
+; V512-NEXT:    vmv1r.v v9, v8
 ; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; V512-NEXT:    vwaddu.vv v8, v11, v10
+; V512-NEXT:    vwaddu.vv v8, v9, v10
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v10
 ; V512-NEXT:    ret
@@ -585,10 +584,10 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
 ; V512:       # %bb.0:
 ; V512-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; V512-NEXT:    vmv2r.v v12, v10
-; V512-NEXT:    vmv2r.v v14, v8
+; V512-NEXT:    vmv2r.v v10, v8
 ; V512-NEXT:    li a0, 32
 ; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; V512-NEXT:    vwaddu.vv v8, v14, v12
+; V512-NEXT:    vwaddu.vv v8, v10, v12
 ; V512-NEXT:    li a0, -1
 ; V512-NEXT:    vwmaccu.vx v8, a0, v12
 ; V512-NEXT:    ret
@@ -988,8 +987,8 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) {
 ; V128:       # %bb.0: # %entry
 ; V128-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V128-NEXT:    vmv1r.v v10, v9
-; V128-NEXT:    vmv1r.v v11, v8
-; V128-NEXT:    vwaddu.vv v8, v11, v10
+; V128-NEXT:    vmv1r.v v9, v8
+; V128-NEXT:    vwaddu.vv v8, v9, v10
 ; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v8, a0, v10
 ; V128-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
index 8676803e20e3b..bafcbfc99a431 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
@@ -705,9 +705,9 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
 ; CHECK-LABEL: shuffle_v64i8_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1011,13 +1011,13 @@ define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) {
 ; CHECK-NEXT:    lui a0, %hi(.LCPI70_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI70_0)
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle8.v v18, (a0)
+; CHECK-NEXT:    vle8.v v17, (a0)
 ; CHECK-NEXT:    lui a0, 11
 ; CHECK-NEXT:    addi a0, a0, -1366
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v18
+; CHECK-NEXT:    vsext.vf2 v16, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v8, v12, v16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index fe2072990e2ac..554702e16060a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -615,10 +615,10 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) {
 ; RV32-LABEL: shuffle_v8i32_as_i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v12, 0
+; RV32-NEXT:    vmv.v.i v11, 0
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    li a1, 63
-; RV32-NEXT:    vwsubu.vx v10, v12, a0
+; RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vand.vx v10, v10, a1
@@ -819,10 +819,10 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) {
 ; RV32-LABEL: shuffle_v8f32_as_i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v12, 0
+; RV32-NEXT:    vmv.v.i v11, 0
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    li a1, 63
-; RV32-NEXT:    vwsubu.vx v10, v12, a0
+; RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vand.vx v10, v10, a1
@@ -864,10 +864,10 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2)
 ; RV32-LABEL: shuffle_v8f32_as_i64_exact:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v12, 0
+; RV32-NEXT:    vmv.v.i v11, 0
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    li a1, 63
-; RV32-NEXT:    vwsubu.vx v10, v12, a0
+; RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vand.vx v10, v10, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
index 9f1f98893d04c..9c179f9b5fa72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll
@@ -27,9 +27,9 @@ define <4 x half> @vsitofp_v4f16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFHMIN-NEXT:    vsra.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -49,9 +49,9 @@ define <4 x half> @vsitofp_v4f16_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -69,9 +69,9 @@ define <4 x half> @vsitofp_v4f16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -121,16 +121,15 @@ define <4 x half> @vsitofp_v4f16_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vsitofp_v4f16_v4i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_v4f16_v4i32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -140,16 +139,15 @@ define <4 x half> @vsitofp_v4f16_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vsitofp_v4f16_v4i32_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_v4f16_v4i32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -161,17 +159,17 @@ define <4 x half> @vsitofp_v4f16_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vsitofp_v4f16_v4i64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v10, v8, v0.t
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_v4f16_v4i64:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.x.w v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -181,17 +179,17 @@ define <4 x half> @vsitofp_v4f16_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vsitofp_v4f16_v4i64_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v10, v8
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_v4f16_v4i64_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.x.w v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -273,8 +271,7 @@ define <4 x float> @vsitofp_v4f32_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vsitofp_v4f32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
   ret <4 x float> %v
@@ -284,8 +281,7 @@ define <4 x float> @vsitofp_v4f32_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl
 ; CHECK-LABEL: vsitofp_v4f32_v4i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x float> %v
@@ -297,8 +293,8 @@ define <4 x double> @vsitofp_v4f64_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vsitofp_v4f64_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vsext.vf4 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -308,8 +304,8 @@ define <4 x double> @vsitofp_v4f64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vsitofp_v4f64_v4i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -321,8 +317,8 @@ define <4 x double> @vsitofp_v4f64_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vsitofp_v4f64_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vsext.vf2 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -332,8 +328,8 @@ define <4 x double> @vsitofp_v4f64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %ev
 ; CHECK-LABEL: vsitofp_v4f64_v4i16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -345,8 +341,8 @@ define <4 x double> @vsitofp_v4f64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vsitofp_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -356,8 +352,8 @@ define <4 x double> @vsitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev
 ; CHECK-LABEL: vsitofp_v4f64_v4i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index 3b1dc298c12ce..9fad63d870af7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -274,8 +274,8 @@ define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    slli a0, a0, 31
 ; CHECK-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>)
@@ -290,8 +290,8 @@ define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
@@ -306,8 +306,8 @@ define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -327,8 +327,8 @@ define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    addiw a0, a0, -1
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -342,8 +342,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -360,8 +360,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 16
   %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
@@ -378,8 +378,8 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vse32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 16
   %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -487,9 +487,9 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -507,9 +507,9 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -527,9 +527,9 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -548,9 +548,9 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -570,9 +570,9 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -590,9 +590,9 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 32
@@ -608,9 +608,9 @@ define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 32
@@ -630,9 +630,9 @@ define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 32
@@ -647,9 +647,9 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 32
@@ -667,9 +667,9 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    li a0, 1
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 16
@@ -688,9 +688,9 @@ define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    li a0, 50
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index 1267bcd1e0717..f5e3749a74aaa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -53,27 +53,26 @@ declare <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16>, <128 x i1>, i32)
 define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vtrunc_v128i7_v128i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    li a1, 64
-; CHECK-NEXT:    vslidedown.vi v12, v0, 8
+; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v24, v0, 8
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    bltu a0, a1, .LBB4_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:  .LBB4_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v24, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    addi a2, a0, -64
 ; CHECK-NEXT:    sltu a0, a0, a2
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a2
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v24, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; CHECK-NEXT:    li a0, 128
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v24, a1
+; CHECK-NEXT:    vslideup.vx v8, v16, a1
 ; CHECK-NEXT:    ret
   %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl)
   ret <128 x i7> %v
@@ -189,9 +188,9 @@ define <15 x i16> @vtrunc_v15i16_v15i64(<15 x i64> %a, <15 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vtrunc_v15i16_v15i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <15 x i16> @llvm.vp.trunc.v15i16.v15i64(<15 x i64> %a, <15 x i1> %m, i32 %vl)
   ret <15 x i16> %v
@@ -229,20 +228,19 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 72
-; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    slli a2, a2, 6
 ; RV32-NEXT:    sub sp, sp, a2
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 64 * vlenb
 ; RV32-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
 ; RV32-NEXT:    vmv1r.v v7, v0
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 40
-; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    slli a2, a2, 5
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
@@ -297,27 +295,24 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    vle64.v v8, (t6)
 ; RV32-NEXT:    csrr t6, vlenb
 ; RV32-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    li a0, 40
 ; RV32-NEXT:    mul t6, t6, a0
 ; RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add t6, sp, t6
 ; RV32-NEXT:    addi t6, t6, 16
 ; RV32-NEXT:    vs8r.v v8, (t6) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vle64.v v8, (t5)
-; RV32-NEXT:    vle64.v v16, (t2)
-; RV32-NEXT:    vle64.v v24, (a1)
+; RV32-NEXT:    vle64.v v24, (t5)
+; RV32-NEXT:    vle64.v v8, (t2)
+; RV32-NEXT:    vle64.v v16, (a1)
 ; RV32-NEXT:    csrr t2, vlenb
 ; RV32-NEXT:    li t5, 48
 ; RV32-NEXT:    mul t2, t2, t5
 ; RV32-NEXT:    add t2, sp, t2
 ; RV32-NEXT:    addi t2, t2, 16
-; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    vle64.v v24, (t3)
-; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    slli t2, t2, 3
-; RV32-NEXT:    add t2, sp, t2
-; RV32-NEXT:    addi t2, t2, 16
-; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vle64.v v16, (t3)
+; RV32-NEXT:    addi t2, sp, 16
+; RV32-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    and t2, t1, t0
 ; RV32-NEXT:    and t1, s0, t4
 ; RV32-NEXT:    addi a1, a1, 256
@@ -327,32 +322,27 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    li t0, 32
 ; RV32-NEXT:  .LBB16_8:
 ; RV32-NEXT:    vsetvli zero, t2, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v8, 0, v0.t
-; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    li t3, 24
-; RV32-NEXT:    mul t2, t2, t3
-; RV32-NEXT:    add t2, sp, t2
-; RV32-NEXT:    addi t2, t2, 16
-; RV32-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vnsrl.wi v24, v24, 0, v0.t
 ; RV32-NEXT:    vmv1r.v v0, v3
 ; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    li t3, 56
+; RV32-NEXT:    li t3, 40
 ; RV32-NEXT:    mul t2, t2, t3
 ; RV32-NEXT:    add t2, sp, t2
 ; RV32-NEXT:    addi t2, t2, 16
-; RV32-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v16, (t2) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v24, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 6
+; RV32-NEXT:    li t2, 56
+; RV32-NEXT:    mul a5, a5, t2
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    vmv1r.v v0, v6
 ; RV32-NEXT:    vsetvli zero, t1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    slli a5, a5, 4
+; RV32-NEXT:    slli a5, a5, 3
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
 ; RV32-NEXT:    vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
@@ -361,9 +351,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    addi t0, t0, -1
 ; RV32-NEXT:    and a5, t0, a5
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vle64.v v16, (a1)
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v30, v7, 2
 ; RV32-NEXT:    vmv1r.v v0, v4
@@ -372,130 +360,125 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    mul a1, a1, t0
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a6, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    mul a1, a1, a6
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    vmv1r.v v0, v2
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
+; RV32-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    li a5, 40
+; RV32-NEXT:    mul a1, a1, a5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; RV32-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    bltu a4, a2, .LBB16_10
 ; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    li a4, 16
 ; RV32-NEXT:  .LBB16_10:
 ; RV32-NEXT:    vmv1r.v v0, v5
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a4, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v8, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    li a4, 48
 ; RV32-NEXT:    mul a1, a1, a4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    mv a1, a7
 ; RV32-NEXT:    bltu a7, a3, .LBB16_12
 ; RV32-NEXT:  # %bb.11:
 ; RV32-NEXT:    li a1, 32
 ; RV32-NEXT:  .LBB16_12:
 ; RV32-NEXT:    vmv1r.v v0, v30
+; RV32-NEXT:    vmv4r.v v8, v24
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
-; RV32-NEXT:    mul a4, a4, a5
+; RV32-NEXT:    slli a4, a4, 3
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vmv4r.v v8, v24
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
+; RV32-NEXT:    li a5, 40
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
+; RV32-NEXT:    li a5, 40
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 6
+; RV32-NEXT:    li a5, 56
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; RV32-NEXT:    vslideup.vi v16, v8, 16
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 6
+; RV32-NEXT:    li a5, 56
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    addi a4, a1, -16
 ; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    mul a5, a5, a6
+; RV32-NEXT:    slli a5, a5, 4
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
 ; RV32-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vslideup.vi v16, v24, 16
 ; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    li a6, 56
-; RV32-NEXT:    mul a5, a5, a6
-; RV32-NEXT:    add a5, sp, a5
-; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
-; RV32-NEXT:    csrr a5, vlenb
 ; RV32-NEXT:    li a6, 48
 ; RV32-NEXT:    mul a5, a5, a6
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    csrr a5, vlenb
-; RV32-NEXT:    li a6, 24
+; RV32-NEXT:    li a6, 40
 ; RV32-NEXT:    mul a5, a5, a6
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
 ; RV32-NEXT:    vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vslideup.vi v16, v8, 16
+; RV32-NEXT:    vslideup.vi v24, v8, 16
 ; RV32-NEXT:    csrr a5, vlenb
 ; RV32-NEXT:    li a6, 48
 ; RV32-NEXT:    mul a5, a5, a6
 ; RV32-NEXT:    add a5, sp, a5
 ; RV32-NEXT:    addi a5, a5, 16
-; RV32-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; RV32-NEXT:    vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
 ; RV32-NEXT:    sltu a1, a1, a4
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    and a1, a1, a4
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 5
+; RV32-NEXT:    li a5, 24
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV32-NEXT:    bltu a7, a2, .LBB16_14
 ; RV32-NEXT:  # %bb.13:
 ; RV32-NEXT:    li a7, 16
 ; RV32-NEXT:  .LBB16_14:
 ; RV32-NEXT:    vmv1r.v v0, v7
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 40
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, a7, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0, v0.t
+; RV32-NEXT:    vnsrl.wi v24, v24, 0, v0.t
 ; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; RV32-NEXT:    vslideup.vi v24, v8, 16
 ; RV32-NEXT:    vse32.v v24, (a0)
@@ -508,23 +491,17 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV32-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    addi a1, a0, 128
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 56
-; RV32-NEXT:    mul a2, a2, a3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 16
-; RV32-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV32-NEXT:    vse32.v v8, (a1)
+; RV32-NEXT:    vse32.v v16, (a1)
 ; RV32-NEXT:    addi a0, a0, 384
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 6
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a0)
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 72
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 6
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    .cfi_def_cfa sp, 32
 ; RV32-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
@@ -540,20 +517,19 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset s0, -8
 ; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    li a3, 72
-; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    slli a2, a2, 6
 ; RV64-NEXT:    sub sp, sp, a2
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 64 * vlenb
 ; RV64-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
 ; RV64-NEXT:    vmv1r.v v7, v0
 ; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
 ; RV64-NEXT:    add a2, sp, a2
 ; RV64-NEXT:    addi a2, a2, 32
 ; RV64-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    li a3, 40
-; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    slli a2, a2, 5
 ; RV64-NEXT:    add a2, sp, a2
 ; RV64-NEXT:    addi a2, a2, 32
 ; RV64-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
@@ -608,27 +584,24 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    vle64.v v8, (t6)
 ; RV64-NEXT:    csrr t6, vlenb
 ; RV64-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    li a0, 40
 ; RV64-NEXT:    mul t6, t6, a0
 ; RV64-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    add t6, sp, t6
 ; RV64-NEXT:    addi t6, t6, 32
 ; RV64-NEXT:    vs8r.v v8, (t6) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vle64.v v8, (t5)
-; RV64-NEXT:    vle64.v v16, (t2)
-; RV64-NEXT:    vle64.v v24, (a1)
+; RV64-NEXT:    vle64.v v24, (t5)
+; RV64-NEXT:    vle64.v v8, (t2)
+; RV64-NEXT:    vle64.v v16, (a1)
 ; RV64-NEXT:    csrr t2, vlenb
 ; RV64-NEXT:    li t5, 48
 ; RV64-NEXT:    mul t2, t2, t5
 ; RV64-NEXT:    add t2, sp, t2
 ; RV64-NEXT:    addi t2, t2, 32
-; RV64-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    vle64.v v24, (t3)
-; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    slli t2, t2, 3
-; RV64-NEXT:    add t2, sp, t2
-; RV64-NEXT:    addi t2, t2, 32
-; RV64-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vle64.v v16, (t3)
+; RV64-NEXT:    addi t2, sp, 32
+; RV64-NEXT:    vs8r.v v16, (t2) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    and t2, t1, t0
 ; RV64-NEXT:    and t1, s0, t4
 ; RV64-NEXT:    addi a1, a1, 256
@@ -638,32 +611,27 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    li t0, 32
 ; RV64-NEXT:  .LBB16_8:
 ; RV64-NEXT:    vsetvli zero, t2, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v24, v8, 0, v0.t
-; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    li t3, 24
-; RV64-NEXT:    mul t2, t2, t3
-; RV64-NEXT:    add t2, sp, t2
-; RV64-NEXT:    addi t2, t2, 32
-; RV64-NEXT:    vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vnsrl.wi v24, v24, 0, v0.t
 ; RV64-NEXT:    vmv1r.v v0, v3
 ; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    li t3, 56
+; RV64-NEXT:    li t3, 40
 ; RV64-NEXT:    mul t2, t2, t3
 ; RV64-NEXT:    add t2, sp, t2
 ; RV64-NEXT:    addi t2, t2, 32
-; RV64-NEXT:    vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v16, (t2) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v24, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 6
+; RV64-NEXT:    li t2, 56
+; RV64-NEXT:    mul a5, a5, t2
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v6
 ; RV64-NEXT:    vsetvli zero, t1, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    slli a5, a5, 3
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
 ; RV64-NEXT:    vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
@@ -672,9 +640,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    addi t0, t0, -1
 ; RV64-NEXT:    and a5, t0, a5
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    addi a1, sp, 32
-; RV64-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vle64.v v16, (a1)
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV64-NEXT:    vslidedown.vi v30, v7, 2
 ; RV64-NEXT:    vmv1r.v v0, v4
@@ -683,130 +649,125 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    mul a1, a1, t0
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
-; RV64-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a6, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    mul a1, a1, a6
+; RV64-NEXT:    slli a1, a1, 4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
 ; RV64-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v2
+; RV64-NEXT:    addi a1, sp, 32
+; RV64-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
+; RV64-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a1, a1, a5
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
-; RV64-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v16, v8, 0, v0.t
+; RV64-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    bltu a4, a2, .LBB16_10
 ; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    li a4, 16
 ; RV64-NEXT:  .LBB16_10:
 ; RV64-NEXT:    vmv1r.v v0, v5
-; RV64-NEXT:    addi a1, sp, 32
-; RV64-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a4, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v24, v8, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a4, 48
 ; RV64-NEXT:    mul a1, a1, a4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
-; RV64-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    mv a1, a7
 ; RV64-NEXT:    bltu a7, a3, .LBB16_12
 ; RV64-NEXT:  # %bb.11:
 ; RV64-NEXT:    li a1, 32
 ; RV64-NEXT:  .LBB16_12:
 ; RV64-NEXT:    vmv1r.v v0, v30
+; RV64-NEXT:    vmv4r.v v8, v24
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    li a5, 24
-; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    slli a4, a4, 3
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
 ; RV64-NEXT:    vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vmv4r.v v8, v24
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
-; RV64-NEXT:    vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    li a5, 24
+; RV64-NEXT:    li a5, 40
 ; RV64-NEXT:    mul a4, a4, a5
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
 ; RV64-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 6
+; RV64-NEXT:    li a5, 56
+; RV64-NEXT:    mul a4, a4, a5
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
 ; RV64-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; RV64-NEXT:    vslideup.vi v16, v8, 16
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 6
+; RV64-NEXT:    li a5, 56
+; RV64-NEXT:    mul a4, a4, a5
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
 ; RV64-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    addi a4, a1, -16
 ; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    mul a5, a5, a6
+; RV64-NEXT:    slli a5, a5, 4
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
 ; RV64-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vslideup.vi v16, v24, 16
 ; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    mul a5, a5, a6
-; RV64-NEXT:    add a5, sp, a5
-; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    csrr a5, vlenb
 ; RV64-NEXT:    li a6, 48
 ; RV64-NEXT:    mul a5, a5, a6
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    csrr a5, vlenb
-; RV64-NEXT:    li a6, 24
+; RV64-NEXT:    li a6, 40
 ; RV64-NEXT:    mul a5, a5, a6
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
 ; RV64-NEXT:    vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vslideup.vi v16, v8, 16
+; RV64-NEXT:    vslideup.vi v24, v8, 16
 ; RV64-NEXT:    csrr a5, vlenb
 ; RV64-NEXT:    li a6, 48
 ; RV64-NEXT:    mul a5, a5, a6
 ; RV64-NEXT:    add a5, sp, a5
 ; RV64-NEXT:    addi a5, a5, 32
-; RV64-NEXT:    vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; RV64-NEXT:    vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
 ; RV64-NEXT:    sltu a1, a1, a4
 ; RV64-NEXT:    addi a1, a1, -1
 ; RV64-NEXT:    and a1, a1, a4
 ; RV64-NEXT:    csrr a4, vlenb
-; RV64-NEXT:    slli a4, a4, 5
+; RV64-NEXT:    li a5, 24
+; RV64-NEXT:    mul a4, a4, a5
 ; RV64-NEXT:    add a4, sp, a4
 ; RV64-NEXT:    addi a4, a4, 32
-; RV64-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v16, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; RV64-NEXT:    bltu a7, a2, .LBB16_14
 ; RV64-NEXT:  # %bb.13:
 ; RV64-NEXT:    li a7, 16
 ; RV64-NEXT:  .LBB16_14:
 ; RV64-NEXT:    vmv1r.v v0, v7
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 40
-; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    slli a1, a1, 5
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
-; RV64-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV64-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetvli zero, a7, e32, m4, ta, ma
-; RV64-NEXT:    vnsrl.wi v24, v16, 0, v0.t
+; RV64-NEXT:    vnsrl.wi v24, v24, 0, v0.t
 ; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
 ; RV64-NEXT:    vslideup.vi v24, v8, 16
 ; RV64-NEXT:    vse32.v v24, (a0)
@@ -819,23 +780,17 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
 ; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vse32.v v8, (a1)
 ; RV64-NEXT:    addi a1, a0, 128
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    li a3, 56
-; RV64-NEXT:    mul a2, a2, a3
-; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 32
-; RV64-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT:    vse32.v v8, (a1)
+; RV64-NEXT:    vse32.v v16, (a1)
 ; RV64-NEXT:    addi a0, a0, 384
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 32
 ; RV64-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vse32.v v8, (a0)
 ; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    li a1, 72
-; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    slli a0, a0, 6
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    .cfi_def_cfa sp, 48
 ; RV64-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
@@ -852,27 +807,26 @@ declare <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64>, <32 x i1>, i32)
 define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vtrunc_v32i32_v32i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    li a2, 16
-; CHECK-NEXT:    vslidedown.vi v12, v0, 2
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v24, v0, 2
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    bltu a0, a2, .LBB17_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:  .LBB17_2:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v24, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    addi a1, a0, -16
 ; CHECK-NEXT:    sltu a0, a0, a1
 ; CHECK-NEXT:    addi a0, a0, -1
 ; CHECK-NEXT:    and a0, a0, a1
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v24, v16, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v16, v16, 0, v0.t
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v24, 16
+; CHECK-NEXT:    vslideup.vi v8, v16, 16
 ; CHECK-NEXT:    ret
   %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl)
   ret <32 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
index b72e3cfcb920a..bd3f3317f4647 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll
@@ -27,9 +27,9 @@ define <4 x half> @vuitofp_v4f16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -49,9 +49,9 @@ define <4 x half> @vuitofp_v4f16_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %ev
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -69,9 +69,9 @@ define <4 x half> @vuitofp_v4f16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -121,16 +121,15 @@ define <4 x half> @vuitofp_v4f16_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vuitofp_v4f16_v4i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_v4f16_v4i32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -140,16 +139,15 @@ define <4 x half> @vuitofp_v4f16_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vuitofp_v4f16_v4i32_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_v4f16_v4i32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -161,17 +159,17 @@ define <4 x half> @vuitofp_v4f16_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %
 ; ZVFH-LABEL: vuitofp_v4f16_v4i64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_v4f16_v4i64:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -181,17 +179,17 @@ define <4 x half> @vuitofp_v4f16_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
 ; ZVFH-LABEL: vuitofp_v4f16_v4i64_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v10, v8
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_v4f16_v4i64_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.xu.w v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -273,8 +271,7 @@ define <4 x float> @vuitofp_v4f32_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vuitofp_v4f32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
   ret <4 x float> %v
@@ -284,8 +281,7 @@ define <4 x float> @vuitofp_v4f32_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl
 ; CHECK-LABEL: vuitofp_v4f32_v4i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x float> %v
@@ -297,8 +293,8 @@ define <4 x double> @vuitofp_v4f64_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %
 ; CHECK-LABEL: vuitofp_v4f64_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vzext.vf4 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -308,8 +304,8 @@ define <4 x double> @vuitofp_v4f64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl)
 ; CHECK-LABEL: vuitofp_v4f64_v4i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -321,8 +317,8 @@ define <4 x double> @vuitofp_v4f64_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vuitofp_v4f64_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -332,8 +328,8 @@ define <4 x double> @vuitofp_v4f64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %ev
 ; CHECK-LABEL: vuitofp_v4f64_v4i16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
@@ -345,8 +341,8 @@ define <4 x double> @vuitofp_v4f64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext
 ; CHECK-LABEL: vuitofp_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x double> %v
@@ -356,8 +352,8 @@ define <4 x double> @vuitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev
 ; CHECK-LABEL: vuitofp_v4f64_v4i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
index 8a8fe234cacd1..54cb91d1ced9e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfadd_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfadd_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfadd_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -94,9 +94,9 @@ define <2 x half> @vfadd_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -119,9 +119,9 @@ define <3 x half> @vfadd_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <3 x half> @llvm.vp.fadd.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl)
   ret <3 x half> %v
@@ -142,9 +142,9 @@ define <4 x half> @vfadd_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -163,9 +163,9 @@ define <4 x half> @vfadd_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -187,9 +187,9 @@ define <4 x half> @vfadd_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -213,9 +213,9 @@ define <4 x half> @vfadd_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -238,9 +238,9 @@ define <8 x half> @vfadd_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -259,9 +259,9 @@ define <8 x half> @vfadd_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -278,14 +278,14 @@ define <8 x half> @vfadd_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -304,14 +304,14 @@ define <8 x half> @vfadd_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -334,9 +334,9 @@ define <16 x half> @vfadd_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -355,9 +355,9 @@ define <16 x half> @vfadd_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -374,14 +374,14 @@ define <16 x half> @vfadd_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -400,14 +400,14 @@ define <16 x half> @vfadd_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
index 690c8af7fc8e7..a1e4695be2223 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll
@@ -86,10 +86,9 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x,  <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: isnan_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 512
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl)
   ret <8 x i1> %1
@@ -111,10 +110,9 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isnan_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 256
-; CHECK-NEXT:    vmseq.vx v8, v12, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl)
   ret <16 x i1> %1
@@ -162,10 +160,9 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isposinf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfclass.v v10, v8, v0.t
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vmseq.vx v8, v10, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf"
   ret <4 x i1> %1
@@ -187,9 +184,8 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl)
 ; CHECK-LABEL: isneginf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vfclass.v v12, v8, v0.t
-; CHECK-NEXT:    vmseq.vi v8, v12, 1, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfclass.v v8, v8, v0.t
+; CHECK-NEXT:    vmseq.vi v0, v8, 1, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf"
   ret <8 x i1> %1
@@ -212,9 +208,8 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 126
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite"
   ret <16 x i1> %1
@@ -239,9 +234,8 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 112
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite"
   ret <16 x i1> %1
@@ -265,9 +259,8 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vfclass.v v8, v8, v0.t
 ; CHECK-NEXT:    li a0, 897
-; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan"
   ret <16 x i1> %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
index dfd509062ccf7..786f161415859 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll
@@ -2577,10 +2577,8 @@ define <16 x i1> @fcmp_ogt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2591,12 +2589,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2609,12 +2605,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2628,10 +2622,8 @@ define <16 x i1> @fcmp_oge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2642,12 +2634,10 @@ define <16 x i1> @fcmp_oge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2660,12 +2650,10 @@ define <16 x i1> @fcmp_oge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2679,10 +2667,8 @@ define <16 x i1> @fcmp_olt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2693,12 +2679,10 @@ define <16 x i1> @fcmp_olt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2711,12 +2695,10 @@ define <16 x i1> @fcmp_olt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2730,10 +2712,8 @@ define <16 x i1> @fcmp_ole_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2744,12 +2724,10 @@ define <16 x i1> @fcmp_ole_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2762,12 +2740,10 @@ define <16 x i1> @fcmp_ole_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2781,12 +2757,11 @@ define <16 x i1> @fcmp_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2798,13 +2773,12 @@ define <16 x i1> @fcmp_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2818,13 +2792,12 @@ define <16 x i1> @fcmp_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2836,9 +2809,9 @@ define <16 x i1> @fcmp_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2849,9 +2822,9 @@ define <16 x i1> @fcmp_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2864,9 +2837,9 @@ define <16 x i1> @fcmp_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2880,12 +2853,11 @@ define <16 x i1> @fcmp_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2897,13 +2869,12 @@ define <16 x i1> @fcmp_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2917,13 +2888,12 @@ define <16 x i1> @fcmp_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2937,10 +2907,9 @@ define <16 x i1> @fcmp_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2951,12 +2920,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2969,12 +2937,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2988,10 +2955,9 @@ define <16 x i1> @fcmp_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3002,12 +2968,11 @@ define <16 x i1> @fcmp_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3020,12 +2985,11 @@ define <16 x i1> @fcmp_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3039,10 +3003,9 @@ define <16 x i1> @fcmp_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3053,12 +3016,11 @@ define <16 x i1> @fcmp_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3071,12 +3033,11 @@ define <16 x i1> @fcmp_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3090,10 +3051,9 @@ define <16 x i1> @fcmp_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3104,12 +3064,11 @@ define <16 x i1> @fcmp_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3122,12 +3081,11 @@ define <16 x i1> @fcmp_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3173,9 +3131,9 @@ define <16 x i1> @fcmp_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -3186,9 +3144,9 @@ define <16 x i1> @fcmp_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3201,9 +3159,9 @@ define <16 x i1> @fcmp_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -3256,10 +3214,8 @@ define <32 x i1> @fcmp_ogt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3271,12 +3227,10 @@ define <32 x i1> @fcmp_ogt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3290,12 +3244,10 @@ define <32 x i1> @fcmp_ogt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3310,10 +3262,8 @@ define <32 x i1> @fcmp_oge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3325,12 +3275,10 @@ define <32 x i1> @fcmp_oge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3344,12 +3292,10 @@ define <32 x i1> @fcmp_oge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3364,10 +3310,8 @@ define <32 x i1> @fcmp_olt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3379,12 +3323,10 @@ define <32 x i1> @fcmp_olt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3398,12 +3340,10 @@ define <32 x i1> @fcmp_olt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3418,10 +3358,8 @@ define <32 x i1> @fcmp_ole_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3433,12 +3371,10 @@ define <32 x i1> @fcmp_ole_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3452,12 +3388,10 @@ define <32 x i1> @fcmp_ole_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3472,12 +3406,11 @@ define <32 x i1> @fcmp_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3489,14 +3422,13 @@ define <32 x i1> @fcmp_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3510,14 +3442,13 @@ define <32 x i1> @fcmp_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3530,9 +3461,9 @@ define <32 x i1> @fcmp_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3544,9 +3475,9 @@ define <32 x i1> @fcmp_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3560,9 +3491,9 @@ define <32 x i1> @fcmp_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3577,12 +3508,11 @@ define <32 x i1> @fcmp_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3594,14 +3524,13 @@ define <32 x i1> @fcmp_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3615,14 +3544,13 @@ define <32 x i1> @fcmp_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3637,10 +3565,9 @@ define <32 x i1> @fcmp_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3652,12 +3579,11 @@ define <32 x i1> @fcmp_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3671,12 +3597,11 @@ define <32 x i1> @fcmp_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3691,10 +3616,9 @@ define <32 x i1> @fcmp_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3706,12 +3630,11 @@ define <32 x i1> @fcmp_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3725,12 +3648,11 @@ define <32 x i1> @fcmp_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3745,10 +3667,9 @@ define <32 x i1> @fcmp_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3760,12 +3681,11 @@ define <32 x i1> @fcmp_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3779,12 +3699,11 @@ define <32 x i1> @fcmp_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3799,10 +3718,9 @@ define <32 x i1> @fcmp_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3814,12 +3732,11 @@ define <32 x i1> @fcmp_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3833,12 +3750,11 @@ define <32 x i1> @fcmp_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3888,9 +3804,9 @@ define <32 x i1> @fcmp_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3902,9 +3818,9 @@ define <32 x i1> @fcmp_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3918,9 +3834,9 @@ define <32 x i1> @fcmp_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -5868,10 +5784,8 @@ define <8 x i1> @fcmp_ogt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5882,12 +5796,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5900,12 +5812,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5919,10 +5829,8 @@ define <8 x i1> @fcmp_oge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5933,12 +5841,10 @@ define <8 x i1> @fcmp_oge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5951,12 +5857,10 @@ define <8 x i1> @fcmp_oge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5970,10 +5874,8 @@ define <8 x i1> @fcmp_olt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5984,12 +5886,10 @@ define <8 x i1> @fcmp_olt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6002,12 +5902,10 @@ define <8 x i1> @fcmp_olt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6021,10 +5919,8 @@ define <8 x i1> @fcmp_ole_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6035,12 +5931,10 @@ define <8 x i1> @fcmp_ole_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6053,12 +5947,10 @@ define <8 x i1> @fcmp_ole_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6072,12 +5964,11 @@ define <8 x i1> @fcmp_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6089,15 +5980,14 @@ define <8 x i1> @fcmp_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
-; CHECK-NEXT:    ret
-  %head = insertelement <8 x float> poison, float %b, i32 0
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %splat, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6109,13 +5999,12 @@ define <8 x i1> @fcmp_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6127,9 +6016,9 @@ define <8 x i1> @fcmp_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_ord_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6140,9 +6029,9 @@ define <8 x i1> @fcmp_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6155,9 +6044,9 @@ define <8 x i1> @fcmp_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6171,12 +6060,11 @@ define <8 x i1> @fcmp_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6188,13 +6076,12 @@ define <8 x i1> @fcmp_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6208,13 +6095,12 @@ define <8 x i1> @fcmp_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6228,10 +6114,9 @@ define <8 x i1> @fcmp_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6242,12 +6127,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6260,12 +6144,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6279,10 +6162,9 @@ define <8 x i1> @fcmp_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6293,12 +6175,11 @@ define <8 x i1> @fcmp_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6311,12 +6192,11 @@ define <8 x i1> @fcmp_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6330,10 +6210,9 @@ define <8 x i1> @fcmp_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6344,12 +6223,11 @@ define <8 x i1> @fcmp_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6362,12 +6240,11 @@ define <8 x i1> @fcmp_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6381,10 +6258,9 @@ define <8 x i1> @fcmp_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6395,12 +6271,11 @@ define <8 x i1> @fcmp_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6413,12 +6288,11 @@ define <8 x i1> @fcmp_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6464,9 +6338,9 @@ define <8 x i1> @fcmp_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st
 ; CHECK-LABEL: fcmp_uno_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -6477,9 +6351,9 @@ define <8 x i1> @fcmp_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6492,9 +6366,9 @@ define <8 x i1> @fcmp_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -6543,10 +6417,8 @@ define <16 x i1> @fcmp_ogt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6557,12 +6429,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6575,12 +6445,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6594,10 +6462,8 @@ define <16 x i1> @fcmp_oge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6608,12 +6474,10 @@ define <16 x i1> @fcmp_oge_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6626,12 +6490,10 @@ define <16 x i1> @fcmp_oge_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6645,10 +6507,8 @@ define <16 x i1> @fcmp_olt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6659,12 +6519,10 @@ define <16 x i1> @fcmp_olt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6677,12 +6535,10 @@ define <16 x i1> @fcmp_olt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6696,10 +6552,8 @@ define <16 x i1> @fcmp_ole_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6710,12 +6564,10 @@ define <16 x i1> @fcmp_ole_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6728,12 +6580,10 @@ define <16 x i1> @fcmp_ole_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6747,12 +6597,11 @@ define <16 x i1> @fcmp_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6763,14 +6612,13 @@ define <16 x i1> @fcmp_one_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6783,14 +6631,13 @@ define <16 x i1> @fcmp_one_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6802,9 +6649,9 @@ define <16 x i1> @fcmp_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_ord_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6815,9 +6662,9 @@ define <16 x i1> @fcmp_ord_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6830,9 +6677,9 @@ define <16 x i1> @fcmp_ord_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6846,12 +6693,11 @@ define <16 x i1> @fcmp_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6862,14 +6708,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6882,14 +6727,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6903,10 +6747,9 @@ define <16 x i1> @fcmp_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6917,12 +6760,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6935,12 +6777,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6954,10 +6795,9 @@ define <16 x i1> @fcmp_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -6968,12 +6808,11 @@ define <16 x i1> @fcmp_uge_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6986,12 +6825,11 @@ define <16 x i1> @fcmp_uge_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7005,10 +6843,9 @@ define <16 x i1> @fcmp_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7019,12 +6856,11 @@ define <16 x i1> @fcmp_ult_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7037,12 +6873,11 @@ define <16 x i1> @fcmp_ult_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7056,10 +6891,9 @@ define <16 x i1> @fcmp_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7070,12 +6904,11 @@ define <16 x i1> @fcmp_ule_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7088,12 +6921,11 @@ define <16 x i1> @fcmp_ule_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7139,9 +6971,9 @@ define <16 x i1> @fcmp_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin
 ; CHECK-LABEL: fcmp_uno_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -7152,9 +6984,9 @@ define <16 x i1> @fcmp_uno_vf_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -7167,9 +6999,9 @@ define <16 x i1> @fcmp_uno_fv_v16f32(<16 x float> %va, float %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -8484,10 +8316,8 @@ define <4 x i1> @fcmp_ogt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8498,12 +8328,10 @@ define <4 x i1> @fcmp_ogt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8516,12 +8344,10 @@ define <4 x i1> @fcmp_ogt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8535,10 +8361,8 @@ define <4 x i1> @fcmp_oge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8549,12 +8373,10 @@ define <4 x i1> @fcmp_oge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8567,12 +8389,10 @@ define <4 x i1> @fcmp_oge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8586,10 +8406,8 @@ define <4 x i1> @fcmp_olt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8600,12 +8418,10 @@ define <4 x i1> @fcmp_olt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8618,12 +8434,10 @@ define <4 x i1> @fcmp_olt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8637,10 +8451,8 @@ define <4 x i1> @fcmp_ole_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8651,12 +8463,10 @@ define <4 x i1> @fcmp_ole_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8669,12 +8479,10 @@ define <4 x i1> @fcmp_ole_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8688,12 +8496,11 @@ define <4 x i1> @fcmp_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8705,13 +8512,12 @@ define <4 x i1> @fcmp_one_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8725,13 +8531,12 @@ define <4 x i1> @fcmp_one_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8743,9 +8548,9 @@ define <4 x i1> @fcmp_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8756,9 +8561,9 @@ define <4 x i1> @fcmp_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8771,9 +8576,9 @@ define <4 x i1> @fcmp_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8787,12 +8592,11 @@ define <4 x i1> @fcmp_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8804,13 +8608,12 @@ define <4 x i1> @fcmp_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8824,13 +8627,12 @@ define <4 x i1> @fcmp_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8844,10 +8646,9 @@ define <4 x i1> @fcmp_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8858,12 +8659,11 @@ define <4 x i1> @fcmp_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8876,12 +8676,11 @@ define <4 x i1> @fcmp_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8895,10 +8694,9 @@ define <4 x i1> @fcmp_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8909,12 +8707,11 @@ define <4 x i1> @fcmp_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8927,12 +8724,11 @@ define <4 x i1> @fcmp_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8946,10 +8742,9 @@ define <4 x i1> @fcmp_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -8960,12 +8755,11 @@ define <4 x i1> @fcmp_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8978,12 +8772,11 @@ define <4 x i1> @fcmp_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -8997,10 +8790,9 @@ define <4 x i1> @fcmp_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -9011,12 +8803,11 @@ define <4 x i1> @fcmp_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9029,12 +8820,11 @@ define <4 x i1> @fcmp_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9080,9 +8870,9 @@ define <4 x i1> @fcmp_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -9093,9 +8883,9 @@ define <4 x i1> @fcmp_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9108,9 +8898,9 @@ define <4 x i1> @fcmp_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -9159,10 +8949,8 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9173,12 +8961,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9191,12 +8977,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9210,10 +8994,8 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9224,12 +9006,10 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9242,12 +9022,10 @@ define <8 x i1> @fcmp_oge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9261,10 +9039,8 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9275,12 +9051,10 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9293,12 +9067,10 @@ define <8 x i1> @fcmp_olt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9312,10 +9084,8 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9326,12 +9096,10 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9344,12 +9112,10 @@ define <8 x i1> @fcmp_ole_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9363,12 +9129,11 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9379,14 +9144,13 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9399,14 +9163,13 @@ define <8 x i1> @fcmp_one_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9418,9 +9181,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9431,9 +9194,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9446,9 +9209,9 @@ define <8 x i1> @fcmp_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9462,12 +9225,11 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9478,14 +9240,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9498,14 +9259,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9519,10 +9279,9 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9533,12 +9292,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9551,12 +9309,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9570,10 +9327,9 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9584,12 +9340,11 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9602,12 +9357,11 @@ define <8 x i1> @fcmp_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9621,10 +9375,9 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9635,12 +9388,11 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9653,12 +9405,11 @@ define <8 x i1> @fcmp_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9672,10 +9423,9 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9686,12 +9436,11 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9704,12 +9453,11 @@ define <8 x i1> @fcmp_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9755,9 +9503,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmp_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -9768,9 +9516,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -9783,9 +9531,9 @@ define <8 x i1> @fcmp_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
index 472f2073667db..7260b8eb1af76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll
@@ -2138,8 +2138,8 @@ define <16 x i1> @fcmps_oeq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2150,8 +2150,8 @@ define <16 x i1> @fcmps_oeq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2164,8 +2164,8 @@ define <16 x i1> @fcmps_oeq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2314,8 +2314,8 @@ define <16 x i1> @fcmps_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2326,8 +2326,8 @@ define <16 x i1> @fcmps_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2340,8 +2340,8 @@ define <16 x i1> @fcmps_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2353,9 +2353,9 @@ define <16 x i1> @fcmps_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2366,9 +2366,9 @@ define <16 x i1> @fcmps_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2381,9 +2381,9 @@ define <16 x i1> @fcmps_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2396,8 +2396,8 @@ define <16 x i1> @fcmps_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2408,8 +2408,8 @@ define <16 x i1> @fcmps_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2422,8 +2422,8 @@ define <16 x i1> @fcmps_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2435,8 +2435,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2446,8 +2446,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ugt_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2459,8 +2459,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ugt_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2472,8 +2472,8 @@ define <16 x i1> @fcmps_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2483,8 +2483,8 @@ define <16 x i1> @fcmps_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_uge_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2496,8 +2496,8 @@ define <16 x i1> @fcmps_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_uge_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2509,8 +2509,8 @@ define <16 x i1> @fcmps_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2520,8 +2520,8 @@ define <16 x i1> @fcmps_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ult_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2533,8 +2533,8 @@ define <16 x i1> @fcmps_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ult_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2546,8 +2546,8 @@ define <16 x i1> @fcmps_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2557,8 +2557,8 @@ define <16 x i1> @fcmps_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ule_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2570,8 +2570,8 @@ define <16 x i1> @fcmps_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK-LABEL: fcmps_ule_fv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2584,8 +2584,8 @@ define <16 x i1> @fcmps_une_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2596,8 +2596,8 @@ define <16 x i1> @fcmps_une_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2610,8 +2610,8 @@ define <16 x i1> @fcmps_une_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2623,10 +2623,10 @@ define <16 x i1> @fcmps_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -2637,10 +2637,10 @@ define <16 x i1> @fcmps_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2653,10 +2653,10 @@ define <16 x i1> @fcmps_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -2671,8 +2671,8 @@ define <32 x i1> @fcmps_oeq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2684,8 +2684,8 @@ define <32 x i1> @fcmps_oeq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2699,8 +2699,8 @@ define <32 x i1> @fcmps_oeq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2862,8 +2862,8 @@ define <32 x i1> @fcmps_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2875,8 +2875,8 @@ define <32 x i1> @fcmps_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2890,8 +2890,8 @@ define <32 x i1> @fcmps_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2904,9 +2904,9 @@ define <32 x i1> @fcmps_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2918,9 +2918,9 @@ define <32 x i1> @fcmps_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2934,9 +2934,9 @@ define <32 x i1> @fcmps_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2950,8 +2950,8 @@ define <32 x i1> @fcmps_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -2963,8 +2963,8 @@ define <32 x i1> @fcmps_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2978,8 +2978,8 @@ define <32 x i1> @fcmps_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -2992,8 +2992,8 @@ define <32 x i1> @fcmps_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3004,8 +3004,8 @@ define <32 x i1> @fcmps_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3018,8 +3018,8 @@ define <32 x i1> @fcmps_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3032,8 +3032,8 @@ define <32 x i1> @fcmps_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3044,8 +3044,8 @@ define <32 x i1> @fcmps_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3058,8 +3058,8 @@ define <32 x i1> @fcmps_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3072,8 +3072,8 @@ define <32 x i1> @fcmps_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3084,8 +3084,8 @@ define <32 x i1> @fcmps_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3098,8 +3098,8 @@ define <32 x i1> @fcmps_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3112,8 +3112,8 @@ define <32 x i1> @fcmps_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3124,8 +3124,8 @@ define <32 x i1> @fcmps_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3138,8 +3138,8 @@ define <32 x i1> @fcmps_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3153,8 +3153,8 @@ define <32 x i1> @fcmps_une_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3166,8 +3166,8 @@ define <32 x i1> @fcmps_une_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3181,8 +3181,8 @@ define <32 x i1> @fcmps_une_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3195,10 +3195,10 @@ define <32 x i1> @fcmps_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <32 x i1> %1
@@ -3210,10 +3210,10 @@ define <32 x i1> @fcmps_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v9, v9
+; CHECK-NEXT:    vmorn.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <32 x half> poison, half %b, i32 0
   %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer
@@ -4840,8 +4840,8 @@ define <8 x i1> @fcmps_oeq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -4852,8 +4852,8 @@ define <8 x i1> @fcmps_oeq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -4866,8 +4866,8 @@ define <8 x i1> @fcmps_oeq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5016,8 +5016,8 @@ define <8 x i1> @fcmps_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5028,8 +5028,8 @@ define <8 x i1> @fcmps_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5042,8 +5042,8 @@ define <8 x i1> @fcmps_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5055,9 +5055,9 @@ define <8 x i1> @fcmps_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ord_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5068,9 +5068,9 @@ define <8 x i1> @fcmps_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5083,9 +5083,9 @@ define <8 x i1> @fcmps_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5098,8 +5098,8 @@ define <8 x i1> @fcmps_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5110,8 +5110,8 @@ define <8 x i1> @fcmps_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5124,8 +5124,8 @@ define <8 x i1> @fcmps_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5137,8 +5137,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ugt_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5148,8 +5148,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ugt_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5161,8 +5161,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ugt_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5174,8 +5174,8 @@ define <8 x i1> @fcmps_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_uge_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5185,8 +5185,8 @@ define <8 x i1> @fcmps_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_uge_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5198,8 +5198,8 @@ define <8 x i1> @fcmps_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_uge_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5211,8 +5211,8 @@ define <8 x i1> @fcmps_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ult_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5222,8 +5222,8 @@ define <8 x i1> @fcmps_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ult_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5235,8 +5235,8 @@ define <8 x i1> @fcmps_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ult_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5248,8 +5248,8 @@ define <8 x i1> @fcmps_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_ule_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5259,8 +5259,8 @@ define <8 x i1> @fcmps_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ule_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5272,8 +5272,8 @@ define <8 x i1> @fcmps_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK-LABEL: fcmps_ule_fv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5286,8 +5286,8 @@ define <8 x i1> @fcmps_une_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5298,8 +5298,8 @@ define <8 x i1> @fcmps_une_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5312,8 +5312,8 @@ define <8 x i1> @fcmps_une_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5325,10 +5325,10 @@ define <8 x i1> @fcmps_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s
 ; CHECK-LABEL: fcmps_uno_vv_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -5339,10 +5339,10 @@ define <8 x i1> @fcmps_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5355,10 +5355,10 @@ define <8 x i1> @fcmps_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x float> poison, float %b, i32 0
   %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer
@@ -5372,8 +5372,8 @@ define <16 x i1> @fcmps_oeq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5384,8 +5384,8 @@ define <16 x i1> @fcmps_oeq_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5398,8 +5398,8 @@ define <16 x i1> @fcmps_oeq_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5548,8 +5548,8 @@ define <16 x i1> @fcmps_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5560,8 +5560,8 @@ define <16 x i1> @fcmps_one_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5574,8 +5574,8 @@ define <16 x i1> @fcmps_one_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5587,9 +5587,9 @@ define <16 x i1> @fcmps_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ord_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5600,9 +5600,9 @@ define <16 x i1> @fcmps_ord_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5615,9 +5615,9 @@ define <16 x i1> @fcmps_ord_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5630,8 +5630,8 @@ define <16 x i1> @fcmps_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5642,8 +5642,8 @@ define <16 x i1> @fcmps_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5656,8 +5656,8 @@ define <16 x i1> @fcmps_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5669,8 +5669,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ugt_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5680,8 +5680,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ugt_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5693,8 +5693,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ugt_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5706,8 +5706,8 @@ define <16 x i1> @fcmps_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_uge_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5717,8 +5717,8 @@ define <16 x i1> @fcmps_uge_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_uge_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5730,8 +5730,8 @@ define <16 x i1> @fcmps_uge_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_uge_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5743,8 +5743,8 @@ define <16 x i1> @fcmps_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ult_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5754,8 +5754,8 @@ define <16 x i1> @fcmps_ult_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ult_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5767,8 +5767,8 @@ define <16 x i1> @fcmps_ult_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ult_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5780,8 +5780,8 @@ define <16 x i1> @fcmps_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_ule_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5791,8 +5791,8 @@ define <16 x i1> @fcmps_ule_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ule_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5804,8 +5804,8 @@ define <16 x i1> @fcmps_ule_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK-LABEL: fcmps_ule_fv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5818,8 +5818,8 @@ define <16 x i1> @fcmps_une_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5830,8 +5830,8 @@ define <16 x i1> @fcmps_une_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5844,8 +5844,8 @@ define <16 x i1> @fcmps_une_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5857,10 +5857,10 @@ define <16 x i1> @fcmps_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi
 ; CHECK-LABEL: fcmps_uno_vv_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <16 x i1> %1
@@ -5871,10 +5871,10 @@ define <16 x i1> @fcmps_uno_vf_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -5887,10 +5887,10 @@ define <16 x i1> @fcmps_uno_fv_v16f32(<16 x float> %va, float %b) nounwind stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <16 x float> poison, float %b, i32 0
   %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer
@@ -6968,8 +6968,8 @@ define <4 x i1> @fcmps_oeq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -6980,8 +6980,8 @@ define <4 x i1> @fcmps_oeq_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -6994,8 +6994,8 @@ define <4 x i1> @fcmps_oeq_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7144,8 +7144,8 @@ define <4 x i1> @fcmps_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7156,8 +7156,8 @@ define <4 x i1> @fcmps_one_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7170,8 +7170,8 @@ define <4 x i1> @fcmps_one_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7183,9 +7183,9 @@ define <4 x i1> @fcmps_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7196,9 +7196,9 @@ define <4 x i1> @fcmps_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7211,9 +7211,9 @@ define <4 x i1> @fcmps_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7226,8 +7226,8 @@ define <4 x i1> @fcmps_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7238,8 +7238,8 @@ define <4 x i1> @fcmps_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7252,8 +7252,8 @@ define <4 x i1> @fcmps_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7265,8 +7265,8 @@ define <4 x i1> @fcmps_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7276,8 +7276,8 @@ define <4 x i1> @fcmps_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7289,8 +7289,8 @@ define <4 x i1> @fcmps_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7302,8 +7302,8 @@ define <4 x i1> @fcmps_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7313,8 +7313,8 @@ define <4 x i1> @fcmps_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7326,8 +7326,8 @@ define <4 x i1> @fcmps_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7339,8 +7339,8 @@ define <4 x i1> @fcmps_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7350,8 +7350,8 @@ define <4 x i1> @fcmps_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7363,8 +7363,8 @@ define <4 x i1> @fcmps_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7376,8 +7376,8 @@ define <4 x i1> @fcmps_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7387,8 +7387,8 @@ define <4 x i1> @fcmps_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_vf_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7400,8 +7400,8 @@ define <4 x i1> @fcmps_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_fv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7414,8 +7414,8 @@ define <4 x i1> @fcmps_une_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7426,8 +7426,8 @@ define <4 x i1> @fcmps_une_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7440,8 +7440,8 @@ define <4 x i1> @fcmps_une_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7453,10 +7453,10 @@ define <4 x i1> @fcmps_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <4 x i1> %1
@@ -7467,10 +7467,10 @@ define <4 x i1> @fcmps_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7483,10 +7483,10 @@ define <4 x i1> @fcmps_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <4 x double> poison, double %b, i32 0
   %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer
@@ -7500,8 +7500,8 @@ define <8 x i1> @fcmps_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7512,8 +7512,8 @@ define <8 x i1> @fcmps_oeq_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7526,8 +7526,8 @@ define <8 x i1> @fcmps_oeq_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7676,8 +7676,8 @@ define <8 x i1> @fcmps_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7688,8 +7688,8 @@ define <8 x i1> @fcmps_one_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7702,8 +7702,8 @@ define <8 x i1> @fcmps_one_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7715,9 +7715,9 @@ define <8 x i1> @fcmps_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ord_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7728,9 +7728,9 @@ define <8 x i1> @fcmps_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7743,9 +7743,9 @@ define <8 x i1> @fcmps_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7758,8 +7758,8 @@ define <8 x i1> @fcmps_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7770,8 +7770,8 @@ define <8 x i1> @fcmps_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7784,8 +7784,8 @@ define <8 x i1> @fcmps_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7797,8 +7797,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ugt_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7808,8 +7808,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7821,8 +7821,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ugt_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7834,8 +7834,8 @@ define <8 x i1> @fcmps_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uge_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7845,8 +7845,8 @@ define <8 x i1> @fcmps_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7858,8 +7858,8 @@ define <8 x i1> @fcmps_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_uge_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7871,8 +7871,8 @@ define <8 x i1> @fcmps_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ult_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7882,8 +7882,8 @@ define <8 x i1> @fcmps_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7895,8 +7895,8 @@ define <8 x i1> @fcmps_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ult_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7908,8 +7908,8 @@ define <8 x i1> @fcmps_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_ule_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7919,8 +7919,8 @@ define <8 x i1> @fcmps_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_vf_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7932,8 +7932,8 @@ define <8 x i1> @fcmps_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK-LABEL: fcmps_ule_fv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7946,8 +7946,8 @@ define <8 x i1> @fcmps_une_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7958,8 +7958,8 @@ define <8 x i1> @fcmps_une_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7972,8 +7972,8 @@ define <8 x i1> @fcmps_une_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -7985,10 +7985,10 @@ define <8 x i1> @fcmps_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind
 ; CHECK-LABEL: fcmps_uno_vv_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <8 x i1> %1
@@ -7999,10 +7999,10 @@ define <8 x i1> @fcmps_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
@@ -8015,10 +8015,10 @@ define <8 x i1> @fcmps_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x double> poison, double %b, i32 0
   %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
index 30f509436214a..fde81be84c344 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfdiv_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfdiv_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfdiv_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -94,9 +94,9 @@ define <2 x half> @vfdiv_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -119,9 +119,9 @@ define <3 x half> @vfdiv_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <3 x half> @llvm.vp.fdiv.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl)
   ret <3 x half> %v
@@ -142,9 +142,9 @@ define <4 x half> @vfdiv_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -163,9 +163,9 @@ define <4 x half> @vfdiv_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -187,9 +187,9 @@ define <4 x half> @vfdiv_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -213,9 +213,9 @@ define <4 x half> @vfdiv_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -238,9 +238,9 @@ define <8 x half> @vfdiv_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -259,9 +259,9 @@ define <8 x half> @vfdiv_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -278,14 +278,14 @@ define <8 x half> @vfdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -304,14 +304,14 @@ define <8 x half> @vfdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -334,9 +334,9 @@ define <16 x half> @vfdiv_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -355,9 +355,9 @@ define <16 x half> @vfdiv_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -374,14 +374,14 @@ define <16 x half> @vfdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -400,14 +400,14 @@ define <16 x half> @vfdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
index 6f9885d9529d5..4ecd7f83e804d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
@@ -23,11 +23,11 @@ define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -45,11 +45,11 @@ define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x ha
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -71,11 +71,11 @@ define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -99,11 +99,11 @@ define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %v
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -126,11 +126,11 @@ define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -148,11 +148,11 @@ define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x ha
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -174,11 +174,11 @@ define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -202,11 +202,11 @@ define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %v
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -229,11 +229,11 @@ define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -251,11 +251,11 @@ define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x ha
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -277,11 +277,11 @@ define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -305,11 +305,11 @@ define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %v
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -332,11 +332,11 @@ define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half>
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -354,11 +354,11 @@ define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -380,11 +380,11 @@ define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <1
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -408,11 +408,11 @@ define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
index c736973dd0706..daa7750d092a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -67,9 +67,9 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -88,9 +88,9 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -111,9 +111,9 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -132,9 +132,9 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -155,9 +155,9 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -176,9 +176,9 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index c37df892de442..02b20a2ba4965 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -46,9 +46,9 @@ define <2 x half> @vfmax_v2f16_vf(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -71,9 +71,9 @@ define <2 x half> @vfmax_v2f16_fv(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -96,9 +96,9 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
@@ -119,9 +119,9 @@ define <4 x half> @vfmax_v4f16_vf(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -144,9 +144,9 @@ define <4 x half> @vfmax_v4f16_fv(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -169,9 +169,9 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
@@ -188,13 +188,13 @@ define <8 x half> @vfmax_v8f16_vf(<8 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -213,13 +213,13 @@ define <8 x half> @vfmax_v8f16_fv(<8 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -242,9 +242,9 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
@@ -261,13 +261,13 @@ define <16 x half> @vfmax_v16f16_vf(<16 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -286,13 +286,13 @@ define <16 x half> @vfmax_v16f16_fv(<16 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
index c4a143de5cff1..3255cf88c8784 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -67,9 +67,9 @@ define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -88,9 +88,9 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -111,9 +111,9 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -132,9 +132,9 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -155,9 +155,9 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -176,9 +176,9 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index 90afe36a36c0f..92a9fbe441753 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
@@ -46,9 +46,9 @@ define <2 x half> @vfmin_v2f16_vf(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -71,9 +71,9 @@ define <2 x half> @vfmin_v2f16_fv(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -96,9 +96,9 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
@@ -119,9 +119,9 @@ define <4 x half> @vfmin_v4f16_vf(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -144,9 +144,9 @@ define <4 x half> @vfmin_v4f16_fv(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -169,9 +169,9 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
@@ -188,13 +188,13 @@ define <8 x half> @vfmin_v8f16_vf(<8 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -213,13 +213,13 @@ define <8 x half> @vfmin_v8f16_fv(<8 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -242,9 +242,9 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
@@ -261,13 +261,13 @@ define <16 x half> @vfmin_v16f16_vf(<16 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -286,13 +286,13 @@ define <16 x half> @vfmin_v16f16_fv(<16 x half> %a, half %b) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
index 3c0819e549552..4eefe20a5406c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfmul_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfmul_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfmul_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -94,9 +94,9 @@ define <2 x half> @vfmul_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -119,9 +119,9 @@ define <3 x half> @vfmul_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <3 x half> @llvm.vp.fmul.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl)
   ret <3 x half> %v
@@ -142,9 +142,9 @@ define <4 x half> @vfmul_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -163,9 +163,9 @@ define <4 x half> @vfmul_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -187,9 +187,9 @@ define <4 x half> @vfmul_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -213,9 +213,9 @@ define <4 x half> @vfmul_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -238,9 +238,9 @@ define <8 x half> @vfmul_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -259,9 +259,9 @@ define <8 x half> @vfmul_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -278,14 +278,14 @@ define <8 x half> @vfmul_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -304,14 +304,14 @@ define <8 x half> @vfmul_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -334,9 +334,9 @@ define <16 x half> @vfmul_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -355,9 +355,9 @@ define <16 x half> @vfmul_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -374,14 +374,14 @@ define <16 x half> @vfmul_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -400,14 +400,14 @@ define <16 x half> @vfmul_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
index b8a6be40b3f32..0459dce3f4ae3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll
@@ -46,9 +46,9 @@ define <4 x double> @vfpext_v4f16_v4f64(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfpext_v4f16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -59,8 +59,8 @@ define <8 x float> @vfpext_v8f16_v8f32(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfpext_v8f16_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -71,9 +71,9 @@ define <8 x double> @vfpext_v8f16_v8f64(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfpext_v8f16_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -96,8 +96,8 @@ define <4 x double> @vfpext_v4f32_v4f64(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfpext_v4f32_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -108,8 +108,8 @@ define <8 x double> @vfpext_v8f32_v8f64(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfpext_v8f32_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -157,9 +157,9 @@ define <4 x double> @vfpext_v4bf16_v4f64(<4 x bfloat> %va) strictfp {
 ; CHECK-LABEL: vfpext_v4bf16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4bf16(<4 x bfloat> %va, metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -170,8 +170,8 @@ define <8 x float> @vfpext_v8bf16_v8f32(<8 x bfloat> %va) strictfp {
 ; CHECK-LABEL: vfpext_v8bf16_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8bf16(<8 x bfloat> %va, metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -182,9 +182,9 @@ define <8 x double> @vfpext_v8bf16_v8f64(<8 x bfloat> %va) strictfp {
 ; CHECK-LABEL: vfpext_v8bf16_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8bf16(<8 x bfloat> %va, metadata !"fpexcept.strict")
   ret <8 x double> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
index ac58a597a0812..6b0eb5bff31c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptoi-constrained-sdnode.ll
@@ -9,8 +9,8 @@ define <1 x i1> @vfptosi_v1f16_v1i1(<1 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f16_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptosi.v1i1.v1f16(<1 x half> %va, metadata !"fpexcept.strict")
@@ -22,8 +22,8 @@ define <1 x i1> @vfptoui_v1f16_v1i1(<1 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f16_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptoui.v1i1.v1f16(<1 x half> %va, metadata !"fpexcept.strict")
@@ -73,8 +73,7 @@ define <1 x i8> @vfptosi_v1f16_v1i8(<1 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f16_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i8> @llvm.experimental.constrained.fptosi.v1i8.v1f16(<1 x half> %va, metadata !"fpexcept.strict")
   ret <1 x i8> %evec
@@ -85,8 +84,7 @@ define <1 x i8> @vfptoui_v1f16_v1i8(<1 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f16_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i8> @llvm.experimental.constrained.fptoui.v1i8.v1f16(<1 x half> %va, metadata !"fpexcept.strict")
   ret <1 x i8> %evec
@@ -169,8 +167,8 @@ define <2 x i1> @vfptosi_v2f16_v2i1(<2 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f16_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %va, metadata !"fpexcept.strict")
@@ -182,8 +180,8 @@ define <2 x i1> @vfptoui_v2f16_v2i1(<2 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f16_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %va, metadata !"fpexcept.strict")
@@ -195,8 +193,7 @@ define <2 x i8> @vfptosi_v2f16_v2i8(<2 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f16_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %va, metadata !"fpexcept.strict")
   ret <2 x i8> %evec
@@ -207,8 +204,7 @@ define <2 x i8> @vfptoui_v2f16_v2i8(<2 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f16_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %va, metadata !"fpexcept.strict")
   ret <2 x i8> %evec
@@ -291,8 +287,8 @@ define <4 x i1> @vfptosi_v4f16_v4i1(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f16_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
@@ -304,8 +300,8 @@ define <4 x i1> @vfptoui_v4f16_v4i1(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f16_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
@@ -317,8 +313,7 @@ define <4 x i8> @vfptosi_v4f16_v4i8(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f16_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
   ret <4 x i8> %evec
@@ -329,8 +324,7 @@ define <4 x i8> @vfptoui_v4f16_v4i8(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f16_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
   ret <4 x i8> %evec
@@ -387,9 +381,9 @@ define <4 x i64> @vfptosi_v4f16_v4i64(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f16_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
   ret <4 x i64> %evec
@@ -400,9 +394,9 @@ define <4 x i64> @vfptoui_v4f16_v4i64(<4 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f16_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half> %va, metadata !"fpexcept.strict")
   ret <4 x i64> %evec
@@ -413,8 +407,8 @@ define <8 x i1> @vfptosi_v8f16_v8i1(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f16_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
@@ -426,8 +420,8 @@ define <8 x i1> @vfptoui_v8f16_v8i1(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f16_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
@@ -439,8 +433,7 @@ define <8 x i8> @vfptosi_v8f16_v8i8(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f16_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i8> %evec
@@ -451,8 +444,7 @@ define <8 x i8> @vfptoui_v8f16_v8i8(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f16_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i8> %evec
@@ -485,8 +477,8 @@ define <8 x i32> @vfptosi_v8f16_v8i32(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f16_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i32> %evec
@@ -497,8 +489,8 @@ define <8 x i32> @vfptoui_v8f16_v8i32(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f16_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i32> %evec
@@ -509,9 +501,9 @@ define <8 x i64> @vfptosi_v8f16_v8i64(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f16_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i64> %evec
@@ -522,9 +514,9 @@ define <8 x i64> @vfptoui_v8f16_v8i64(<8 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f16_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half> %va, metadata !"fpexcept.strict")
   ret <8 x i64> %evec
@@ -535,8 +527,8 @@ define <16 x i1> @vfptosi_v16f16_v16i1(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f16_v16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
@@ -548,8 +540,8 @@ define <16 x i1> @vfptoui_v16f16_v16i1(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f16_v16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
@@ -561,8 +553,7 @@ define <16 x i8> @vfptosi_v16f16_v16i8(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f16_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
   ret <16 x i8> %evec
@@ -573,8 +564,7 @@ define <16 x i8> @vfptoui_v16f16_v16i8(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f16_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
   ret <16 x i8> %evec
@@ -607,8 +597,8 @@ define <16 x i32> @vfptosi_v16f16_v16i32(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f16_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
   ret <16 x i32> %evec
@@ -619,8 +609,8 @@ define <16 x i32> @vfptoui_v16f16_v16i32(<16 x half> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f16_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half> %va, metadata !"fpexcept.strict")
   ret <16 x i32> %evec
@@ -632,8 +622,8 @@ define <32 x i1> @vfptosi_v32f16_v32i1(<32 x half> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half> %va, metadata !"fpexcept.strict")
@@ -646,8 +636,8 @@ define <32 x i1> @vfptoui_v32f16_v32i1(<32 x half> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half> %va, metadata !"fpexcept.strict")
@@ -660,8 +650,7 @@ define <32 x i8> @vfptosi_v32f16_v32i8(<32 x half> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half> %va, metadata !"fpexcept.strict")
   ret <32 x i8> %evec
@@ -673,8 +662,7 @@ define <32 x i8> @vfptoui_v32f16_v32i8(<32 x half> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half> %va, metadata !"fpexcept.strict")
   ret <32 x i8> %evec
@@ -709,8 +697,8 @@ define <1 x i1> @vfptosi_v1f32_v1i1(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f32_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptosi.v1i1.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
@@ -722,8 +710,8 @@ define <1 x i1> @vfptoui_v1f32_v1i1(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f32_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptoui.v1i1.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
@@ -735,9 +723,9 @@ define <1 x i8> @vfptosi_v1f32_v1i8(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f32_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i8> @llvm.experimental.constrained.fptosi.v1i8.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
   ret <1 x i8> %evec
@@ -748,9 +736,9 @@ define <1 x i8> @vfptoui_v1f32_v1i8(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f32_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i8> @llvm.experimental.constrained.fptoui.v1i8.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
   ret <1 x i8> %evec
@@ -761,8 +749,7 @@ define <1 x i16> @vfptosi_v1f32_v1i16(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f32_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i16> @llvm.experimental.constrained.fptosi.v1i16.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
   ret <1 x i16> %evec
@@ -773,8 +760,7 @@ define <1 x i16> @vfptoui_v1f32_v1i16(<1 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f32_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i16> @llvm.experimental.constrained.fptoui.v1i16.v1f32(<1 x float> %va, metadata !"fpexcept.strict")
   ret <1 x i16> %evec
@@ -831,8 +817,8 @@ define <2 x i1> @vfptosi_v2f32_v2i1(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f32_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
@@ -844,8 +830,8 @@ define <2 x i1> @vfptoui_v2f32_v2i1(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f32_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
@@ -857,9 +843,9 @@ define <2 x i8> @vfptosi_v2f32_v2i8(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f32_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
   ret <2 x i8> %evec
@@ -870,9 +856,9 @@ define <2 x i8> @vfptoui_v2f32_v2i8(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f32_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
   ret <2 x i8> %evec
@@ -883,8 +869,7 @@ define <2 x i16> @vfptosi_v2f32_v2i16(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f32_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
   ret <2 x i16> %evec
@@ -895,8 +880,7 @@ define <2 x i16> @vfptoui_v2f32_v2i16(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f32_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %va, metadata !"fpexcept.strict")
   ret <2 x i16> %evec
@@ -953,8 +937,8 @@ define <4 x i1> @vfptosi_v4f32_v4i1(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f32_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
@@ -966,8 +950,8 @@ define <4 x i1> @vfptoui_v4f32_v4i1(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f32_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
@@ -979,9 +963,9 @@ define <4 x i8> @vfptosi_v4f32_v4i8(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f32_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i8> %evec
@@ -992,9 +976,9 @@ define <4 x i8> @vfptoui_v4f32_v4i8(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f32_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i8> %evec
@@ -1005,8 +989,7 @@ define <4 x i16> @vfptosi_v4f32_v4i16(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f32_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i16> %evec
@@ -1017,8 +1000,7 @@ define <4 x i16> @vfptoui_v4f32_v4i16(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f32_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i16> %evec
@@ -1051,8 +1033,8 @@ define <4 x i64> @vfptosi_v4f32_v4i64(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i64> %evec
@@ -1063,8 +1045,8 @@ define <4 x i64> @vfptoui_v4f32_v4i64(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f32_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %va, metadata !"fpexcept.strict")
   ret <4 x i64> %evec
@@ -1075,8 +1057,8 @@ define <8 x i1> @vfptosi_v8f32_v8i1(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f32_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
@@ -1088,8 +1070,8 @@ define <8 x i1> @vfptoui_v8f32_v8i1(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f32_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
@@ -1101,9 +1083,9 @@ define <8 x i8> @vfptosi_v8f32_v8i8(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f32_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i8> %evec
@@ -1114,9 +1096,9 @@ define <8 x i8> @vfptoui_v8f32_v8i8(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f32_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i8> %evec
@@ -1127,8 +1109,7 @@ define <8 x i16> @vfptosi_v8f32_v8i16(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f32_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i16> %evec
@@ -1139,8 +1120,7 @@ define <8 x i16> @vfptoui_v8f32_v8i16(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f32_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i16> %evec
@@ -1173,8 +1153,8 @@ define <8 x i64> @vfptosi_v8f32_v8i64(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i64> %evec
@@ -1185,8 +1165,8 @@ define <8 x i64> @vfptoui_v8f32_v8i64(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f32_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float> %va, metadata !"fpexcept.strict")
   ret <8 x i64> %evec
@@ -1197,8 +1177,8 @@ define <16 x i1> @vfptosi_v16f32_v16i1(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f32_v16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
@@ -1210,8 +1190,8 @@ define <16 x i1> @vfptoui_v16f32_v16i1(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f32_v16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
@@ -1223,9 +1203,9 @@ define <16 x i8> @vfptosi_v16f32_v16i8(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f32_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
   ret <16 x i8> %evec
@@ -1236,9 +1216,9 @@ define <16 x i8> @vfptoui_v16f32_v16i8(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f32_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
   ret <16 x i8> %evec
@@ -1249,8 +1229,7 @@ define <16 x i16> @vfptosi_v16f32_v16i16(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v16f32_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
   ret <16 x i16> %evec
@@ -1261,8 +1240,7 @@ define <16 x i16> @vfptoui_v16f32_v16i16(<16 x float> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v16f32_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %va, metadata !"fpexcept.strict")
   ret <16 x i16> %evec
@@ -1295,8 +1273,8 @@ define <1 x i1> @vfptosi_v1f64_v1i1(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f64_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptosi.v1i1.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
@@ -1308,8 +1286,8 @@ define <1 x i1> @vfptoui_v1f64_v1i1(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f64_v1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i1> @llvm.experimental.constrained.fptoui.v1i1.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
@@ -1321,9 +1299,9 @@ define <1 x i8> @vfptosi_v1f64_v1i8(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f64_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1336,9 +1314,9 @@ define <1 x i8> @vfptoui_v1f64_v1i8(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f64_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1351,9 +1329,9 @@ define <1 x i16> @vfptosi_v1f64_v1i16(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f64_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i16> @llvm.experimental.constrained.fptosi.v1i16.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
   ret <1 x i16> %evec
@@ -1364,9 +1342,9 @@ define <1 x i16> @vfptoui_v1f64_v1i16(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f64_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <1 x i16> @llvm.experimental.constrained.fptoui.v1i16.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
   ret <1 x i16> %evec
@@ -1377,8 +1355,7 @@ define <1 x i32> @vfptosi_v1f64_v1i32(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v1f64_v1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
   ret <1 x i32> %evec
@@ -1389,8 +1366,7 @@ define <1 x i32> @vfptoui_v1f64_v1i32(<1 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v1f64_v1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double> %va, metadata !"fpexcept.strict")
   ret <1 x i32> %evec
@@ -1423,8 +1399,8 @@ define <2 x i1> @vfptosi_v2f64_v2i1(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f64_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
@@ -1436,8 +1412,8 @@ define <2 x i1> @vfptoui_v2f64_v2i1(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f64_v2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
@@ -1449,9 +1425,9 @@ define <2 x i8> @vfptosi_v2f64_v2i8(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f64_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1464,9 +1440,9 @@ define <2 x i8> @vfptoui_v2f64_v2i8(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f64_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1479,9 +1455,9 @@ define <2 x i16> @vfptosi_v2f64_v2i16(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f64_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
   ret <2 x i16> %evec
@@ -1492,9 +1468,9 @@ define <2 x i16> @vfptoui_v2f64_v2i16(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f64_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
   ret <2 x i16> %evec
@@ -1505,8 +1481,7 @@ define <2 x i32> @vfptosi_v2f64_v2i32(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v2f64_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
   ret <2 x i32> %evec
@@ -1517,8 +1492,7 @@ define <2 x i32> @vfptoui_v2f64_v2i32(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v2f64_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %va, metadata !"fpexcept.strict")
   ret <2 x i32> %evec
@@ -1551,8 +1525,8 @@ define <4 x i1> @vfptosi_v4f64_v4i1(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f64_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
@@ -1564,8 +1538,8 @@ define <4 x i1> @vfptoui_v4f64_v4i1(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f64_v4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
@@ -1577,9 +1551,9 @@ define <4 x i8> @vfptosi_v4f64_v4i8(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f64_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1592,9 +1566,9 @@ define <4 x i8> @vfptoui_v4f64_v4i8(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f64_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1607,9 +1581,9 @@ define <4 x i16> @vfptosi_v4f64_v4i16(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f64_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
   ret <4 x i16> %evec
@@ -1620,9 +1594,9 @@ define <4 x i16> @vfptoui_v4f64_v4i16(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f64_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
   ret <4 x i16> %evec
@@ -1633,8 +1607,7 @@ define <4 x i32> @vfptosi_v4f64_v4i32(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
   ret <4 x i32> %evec
@@ -1645,8 +1618,7 @@ define <4 x i32> @vfptoui_v4f64_v4i32(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %va, metadata !"fpexcept.strict")
   ret <4 x i32> %evec
@@ -1679,8 +1651,8 @@ define <8 x i1> @vfptosi_v8f64_v8i1(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f64_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
@@ -1692,8 +1664,8 @@ define <8 x i1> @vfptoui_v8f64_v8i1(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f64_v8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
@@ -1705,9 +1677,9 @@ define <8 x i8> @vfptosi_v8f64_v8i8(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f64_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1720,9 +1692,9 @@ define <8 x i8> @vfptoui_v8f64_v8i8(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f64_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1735,9 +1707,9 @@ define <8 x i16> @vfptosi_v8f64_v8i16(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f64_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
   ret <8 x i16> %evec
@@ -1748,9 +1720,9 @@ define <8 x i16> @vfptoui_v8f64_v8i16(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f64_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
   ret <8 x i16> %evec
@@ -1761,8 +1733,7 @@ define <8 x i32> @vfptosi_v8f64_v8i32(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptosi_v8f64_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
   ret <8 x i32> %evec
@@ -1773,8 +1744,7 @@ define <8 x i32> @vfptoui_v8f64_v8i32(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptoui_v8f64_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double> %va, metadata !"fpexcept.strict")
   ret <8 x i32> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll
index 1f74691437ad2..39b8507c422cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll
@@ -13,8 +13,7 @@ define <2 x float> @vfptrunc_v2f64_v2f32(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v2f64_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x float> %evec
@@ -25,9 +24,9 @@ define <2 x half> @vfptrunc_v2f64_v2f16(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v2f64_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -38,8 +37,7 @@ define <2 x half> @vfptrunc_v2f32_v2f16(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v2f32_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -50,8 +48,7 @@ define <4 x float> @vfptrunc_v4f64_v4f32(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v4f64_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x float> %evec
@@ -62,9 +59,9 @@ define <4 x half> @vfptrunc_v4f64_v4f16(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v4f64_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -75,8 +72,7 @@ define <4 x half> @vfptrunc_v4f32_v4f16(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v4f32_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -87,8 +83,7 @@ define <8 x float> @vfptrunc_v8f64_v8f32(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v8f64_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -99,9 +94,9 @@ define <8 x half> @vfptrunc_v8f64_v8f16(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -112,8 +107,7 @@ define <8 x half> @vfptrunc_v8f32_v8f16(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v8f32_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -124,9 +118,9 @@ define <2 x bfloat> @vfptrunc_v2f64_v2bf16(<2 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v2f64_v2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x bfloat> %evec
@@ -137,8 +131,7 @@ define <2 x bfloat> @vfptrunc_v2f32_v2bf16(<2 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v2f32_v2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f32(<2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x bfloat> %evec
@@ -149,9 +142,9 @@ define <4 x bfloat> @vfptrunc_v4f64_v4bf16(<4 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v4f64_v4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x bfloat> %evec
@@ -162,8 +155,7 @@ define <4 x bfloat> @vfptrunc_v4f32_v4bf16(<4 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v4f32_v4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f32(<4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x bfloat> %evec
@@ -174,9 +166,9 @@ define <8 x bfloat> @vfptrunc_v8f64_v8bf16(<8 x double> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v8f64_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x bfloat> %evec
@@ -187,8 +179,7 @@ define <8 x bfloat> @vfptrunc_v8f32_v8bf16(<8 x float> %va) strictfp {
 ; CHECK-LABEL: vfptrunc_v8f32_v8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f32(<8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x bfloat> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll
index 6244419de65b1..e11e4641bcb2c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll
@@ -22,9 +22,9 @@ define <2 x half> @vfsqrt_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -42,9 +42,9 @@ define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -64,9 +64,9 @@ define <4 x half> @vfsqrt_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -84,9 +84,9 @@ define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -106,9 +106,9 @@ define <8 x half> @vfsqrt_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -126,9 +126,9 @@ define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -148,9 +148,9 @@ define <16 x half> @vfsqrt_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -168,9 +168,9 @@ define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
index 0f3a6de4f4a90..f464e8838152e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
@@ -23,9 +23,9 @@ define <2 x half> @vfsub_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -44,9 +44,9 @@ define <2 x half> @vfsub_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl)
   ret <2 x half> %v
@@ -68,9 +68,9 @@ define <2 x half> @vfsub_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -94,9 +94,9 @@ define <2 x half> @vfsub_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <2 x half> poison, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
@@ -119,9 +119,9 @@ define <3 x half> @vfsub_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <3 x half> @llvm.vp.fsub.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl)
   ret <3 x half> %v
@@ -142,9 +142,9 @@ define <4 x half> @vfsub_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -163,9 +163,9 @@ define <4 x half> @vfsub_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x half> %v
@@ -187,9 +187,9 @@ define <4 x half> @vfsub_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -213,9 +213,9 @@ define <4 x half> @vfsub_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <4 x half> poison, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
@@ -238,9 +238,9 @@ define <8 x half> @vfsub_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -259,9 +259,9 @@ define <8 x half> @vfsub_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl)
   ret <8 x half> %v
@@ -278,14 +278,14 @@ define <8 x half> @vfsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -304,14 +304,14 @@ define <8 x half> @vfsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <8 x half> poison, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
@@ -334,9 +334,9 @@ define <16 x half> @vfsub_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl)
   ret <16 x half> %v
@@ -355,9 +355,9 @@ define <16 x half> @vfsub_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl)
   ret <16 x half> %v
@@ -374,14 +374,14 @@ define <16 x half> @vfsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
@@ -400,14 +400,14 @@ define <16 x half> @vfsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <16 x half> poison, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll
index ebb920f0ac42e..d538ecda59a63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll
@@ -40,9 +40,9 @@ define <8 x float> @vfwadd_v8f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwadd_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -56,9 +56,9 @@ define <16 x float> @vfwadd_v16f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwadd_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -73,9 +73,9 @@ define <32 x float> @vfwadd_v32f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = load <32 x half>, ptr %y
@@ -144,9 +144,9 @@ define <4 x double> @vfwadd_v4f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwadd_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = load <4 x float>, ptr %y
@@ -160,9 +160,9 @@ define <8 x double> @vfwadd_v8f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwadd_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -176,9 +176,9 @@ define <16 x double> @vfwadd_v16f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwadd_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vfwadd.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vfwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = load <16 x float>, ptr %y
@@ -261,8 +261,8 @@ define <8 x float> @vfwadd_vf_v8f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwadd_vf_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -277,8 +277,8 @@ define <16 x float> @vfwadd_vf_v16f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwadd_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -294,8 +294,8 @@ define <32 x float> @vfwadd_vf_v32f16(ptr %x, half %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -326,8 +326,8 @@ define <4 x double> @vfwadd_vf_v4f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwadd_vf_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = insertelement <4 x float> poison, float %y, i32 0
@@ -342,8 +342,8 @@ define <8 x double> @vfwadd_vf_v8f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwadd_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -358,8 +358,8 @@ define <16 x double> @vfwadd_vf_v16f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwadd_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
index a9e9b757f372e..c70489fd43912 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmacc.ll
@@ -1854,9 +1854,9 @@ define <8 x double> @vfwmacc_vv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, <8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v13
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwmacc.vv v8, v14, v16
+; CHECK-NEXT:    vfwmacc.vv v8, v14, v12
 ; CHECK-NEXT:    ret
   %vd = fpext <8 x half> %vb to <8 x double>
   %ve = fpext <8 x half> %vc to <8 x double>
@@ -1868,9 +1868,9 @@ define <8 x double> @vfwmacc_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, ha
 ; CHECK-LABEL: vfwmacc_vf_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwmacc.vv v8, v14, v12
 ; CHECK-NEXT:    ret
@@ -1887,9 +1887,9 @@ define <8 x double> @vfwnmacc_vv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, <
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v13
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwnmacc.vv v8, v14, v16
+; CHECK-NEXT:    vfwnmacc.vv v8, v14, v12
 ; CHECK-NEXT:    ret
   %vd = fpext <8 x half> %vb to <8 x double>
   %ve = fpext <8 x half> %vc to <8 x double>
@@ -1903,9 +1903,9 @@ define <8 x double> @vfwnmacc_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
 ; CHECK-LABEL: vfwnmacc_vf_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwnmacc.vv v8, v14, v12
 ; CHECK-NEXT:    ret
@@ -1923,9 +1923,9 @@ define <8 x double> @vfwnmacc_fv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
 ; CHECK-LABEL: vfwnmacc_fv_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwnmacc.vv v8, v14, v12
 ; CHECK-NEXT:    ret
@@ -1944,9 +1944,9 @@ define <8 x double> @vfwmsac_vv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, <8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v13
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwmsac.vv v8, v14, v16
+; CHECK-NEXT:    vfwmsac.vv v8, v14, v12
 ; CHECK-NEXT:    ret
   %vd = fpext <8 x half> %vb to <8 x double>
   %ve = fpext <8 x half> %vc to <8 x double>
@@ -1959,9 +1959,9 @@ define <8 x double> @vfwmsac_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, ha
 ; CHECK-LABEL: vfwmsac_vf_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwmsac.vv v8, v14, v12
 ; CHECK-NEXT:    ret
@@ -1979,9 +1979,9 @@ define <8 x double> @vfwnmsac_vv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, <
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v13
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwnmsac.vv v8, v14, v16
+; CHECK-NEXT:    vfwnmsac.vv v8, v14, v12
 ; CHECK-NEXT:    ret
   %vd = fpext <8 x half> %vb to <8 x double>
   %ve = fpext <8 x half> %vc to <8 x double>
@@ -1994,9 +1994,9 @@ define <8 x double> @vfwnmsac_vf_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
 ; CHECK-LABEL: vfwnmsac_vf_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwnmsac.vv v8, v14, v12
 ; CHECK-NEXT:    ret
@@ -2013,9 +2013,9 @@ define <8 x double> @vfwnmsac_fv_v8f64_v8f16(<8 x double> %va, <8 x half> %vb, h
 ; CHECK-LABEL: vfwnmsac_fv_v8f64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vfmv.v.f v13, fa0
 ; CHECK-NEXT:    vfwcvt.f.f.v v14, v12
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v13
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfwnmsac.vv v8, v14, v12
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll
index 1639f21f243d8..afcb18cf1de4b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll
@@ -159,9 +159,9 @@ define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x b
 ; ZVFBFMIN:       # %bb.0:
 ; ZVFBFMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v11
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v10
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <8 x bfloat> %b to <8 x float>
   %c.ext = fpext <8 x bfloat> %c to <8 x float>
@@ -207,9 +207,9 @@ define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <1
 ; ZVFBFMIN:       # %bb.0:
 ; ZVFBFMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v14
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v12
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <16 x bfloat> %b to <16 x float>
   %c.ext = fpext <16 x bfloat> %c to <16 x float>
@@ -257,9 +257,9 @@ define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <3
 ; ZVFBFMIN-NEXT:    li a0, 32
 ; ZVFBFMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v20
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v16
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <32 x bfloat> %b to <32 x float>
   %c.ext = fpext <32 x bfloat> %c to <32 x float>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll
index 47ac1c1a88df4..472507e4943df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll
@@ -40,9 +40,9 @@ define <8 x float> @vfwmul_v8f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwmul_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -56,9 +56,9 @@ define <16 x float> @vfwmul_v16f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwmul_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -73,9 +73,9 @@ define <32 x float> @vfwmul_v32f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = load <32 x half>, ptr %y
@@ -144,9 +144,9 @@ define <4 x double> @vfwmul_v4f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwmul_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = load <4 x float>, ptr %y
@@ -160,9 +160,9 @@ define <8 x double> @vfwmul_v8f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwmul_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -176,9 +176,9 @@ define <16 x double> @vfwmul_v16f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwmul_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vfwmul.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vfwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = load <16 x float>, ptr %y
@@ -261,8 +261,8 @@ define <8 x float> @vfwmul_vf_v8f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwmul_vf_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -277,8 +277,8 @@ define <16 x float> @vfwmul_vf_v16f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwmul_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -294,8 +294,8 @@ define <32 x float> @vfwmul_vf_v32f16(ptr %x, half %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -326,8 +326,8 @@ define <4 x double> @vfwmul_vf_v4f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwmul_vf_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = insertelement <4 x float> poison, float %y, i32 0
@@ -342,8 +342,8 @@ define <8 x double> @vfwmul_vf_v8f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwmul_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -358,8 +358,8 @@ define <16 x double> @vfwmul_vf_v16f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwmul_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll
index 25f6b5ab27411..622699714fa9e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll
@@ -40,9 +40,9 @@ define <8 x float> @vfwsub_v8f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwsub_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -56,9 +56,9 @@ define <16 x float> @vfwsub_v16f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwsub_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
@@ -73,9 +73,9 @@ define <32 x float> @vfwsub_v32f16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = load <32 x half>, ptr %y
@@ -144,9 +144,9 @@ define <4 x double> @vfwsub_v4f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwsub_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = load <4 x float>, ptr %y
@@ -160,9 +160,9 @@ define <8 x double> @vfwsub_v8f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwsub_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = load <8 x float>, ptr %y
@@ -176,9 +176,9 @@ define <16 x double> @vfwsub_v16f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vfwsub_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vfwsub.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vfwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = load <16 x float>, ptr %y
@@ -261,8 +261,8 @@ define <8 x float> @vfwsub_vf_v8f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwsub_vf_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = insertelement <8 x half> poison, half %y, i32 0
@@ -277,8 +277,8 @@ define <16 x float> @vfwsub_vf_v16f16(ptr %x, half %y) {
 ; CHECK-LABEL: vfwsub_vf_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = insertelement <16 x half> poison, half %y, i32 0
@@ -294,8 +294,8 @@ define <32 x float> @vfwsub_vf_v32f16(ptr %x, half %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <32 x half>, ptr %x
   %b = insertelement <32 x half> poison, half %y, i32 0
@@ -326,8 +326,8 @@ define <4 x double> @vfwsub_vf_v4f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwsub_vf_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %x
   %b = insertelement <4 x float> poison, float %y, i32 0
@@ -342,8 +342,8 @@ define <8 x double> @vfwsub_vf_v8f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwsub_vf_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %b = insertelement <8 x float> poison, float %y, i32 0
@@ -358,8 +358,8 @@ define <16 x double> @vfwsub_vf_v16f32(ptr %x, float %y) {
 ; CHECK-LABEL: vfwsub_vf_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %a = load <16 x float>, ptr %x
   %b = insertelement <16 x float> poison, float %y, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
index ce5483e3ae8cc..e389ee251d8a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll
@@ -218,8 +218,8 @@ define <4 x double> @vsitofp_v4i1_v4f64(<4 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -231,8 +231,8 @@ define <4 x double> @vuitofp_v4i1_v4f64(<4 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -270,8 +270,8 @@ define <8 x float> @vsitofp_v8i1_v8f32(<8 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -283,8 +283,8 @@ define <8 x float> @vuitofp_v8i1_v8f32(<8 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -296,8 +296,8 @@ define <8 x double> @vsitofp_v8i1_v8f64(<8 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i1(<8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -309,8 +309,8 @@ define <8 x double> @vuitofp_v8i1_v8f64(<8 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i1(<8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -322,8 +322,8 @@ define <16 x half> @vsitofp_v16i1_v16f16(<16 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -335,8 +335,8 @@ define <16 x half> @vuitofp_v16i1_v16f16(<16 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -348,8 +348,8 @@ define <16 x float> @vsitofp_v16i1_v16f32(<16 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i1(<16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -361,8 +361,8 @@ define <16 x float> @vuitofp_v16i1_v16f32(<16 x i1> %va) strictfp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i1(<16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -375,8 +375,8 @@ define <32 x half> @vsitofp_v32i1_v32f16(<32 x i1> %va) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <32 x half> %evec
@@ -389,8 +389,8 @@ define <32 x half> @vuitofp_v32i1_v32f16(<32 x i1> %va) strictfp {
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <32 x half> %evec
@@ -629,8 +629,8 @@ define <4 x double> @vsitofp_v4i8_v4f64(<4 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i8_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -641,8 +641,8 @@ define <4 x double> @vuitofp_v4i8_v4f64(<4 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i8_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -677,8 +677,8 @@ define <8 x float> @vsitofp_v8i8_v8f32(<8 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i8_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -689,8 +689,8 @@ define <8 x float> @vuitofp_v8i8_v8f32(<8 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i8_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -701,8 +701,8 @@ define <8 x double> @vsitofp_v8i8_v8f64(<8 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i8_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i8(<8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -713,8 +713,8 @@ define <8 x double> @vuitofp_v8i8_v8f64(<8 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i8_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i8(<8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -725,8 +725,8 @@ define <16 x half> @vsitofp_v16i8_v16f16(<16 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v16i8_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -737,8 +737,8 @@ define <16 x half> @vuitofp_v16i8_v16f16(<16 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v16i8_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -749,8 +749,8 @@ define <16 x float> @vsitofp_v16i8_v16f32(<16 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v16i8_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -761,8 +761,8 @@ define <16 x float> @vuitofp_v16i8_v16f32(<16 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v16i8_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i8(<16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -773,10 +773,10 @@ define <32 x half> @vsitofp_v32i8_v32f16(<32 x i8> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v32i8_v32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <32 x half> %evec
@@ -787,10 +787,10 @@ define <32 x half> @vuitofp_v32i8_v32f16(<32 x i8> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v32i8_v32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <32 x half> %evec
@@ -987,8 +987,8 @@ define <4 x double> @vsitofp_v4i16_v4f64(<4 x i16> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -999,8 +999,8 @@ define <4 x double> @vuitofp_v4i16_v4f64(<4 x i16> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i16_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -1033,8 +1033,8 @@ define <8 x float> @vsitofp_v8i16_v8f32(<8 x i16> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i16_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -1045,8 +1045,8 @@ define <8 x float> @vuitofp_v8i16_v8f32(<8 x i16> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i16_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -1057,8 +1057,8 @@ define <8 x double> @vsitofp_v8i16_v8f64(<8 x i16> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i16_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -1069,8 +1069,8 @@ define <8 x double> @vuitofp_v8i16_v8f64(<8 x i16> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i16_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i16(<8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -1103,8 +1103,8 @@ define <16 x float> @vsitofp_v16i16_v16f32(<16 x i16> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v16i16_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -1115,8 +1115,8 @@ define <16 x float> @vuitofp_v16i16_v16f32(<16 x i16> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v16i16_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x float> %evec
@@ -1151,8 +1151,7 @@ define <1 x half> @vsitofp_v1i32_v1f16(<1 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v1i32_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i32(<1 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x half> %evec
@@ -1163,8 +1162,7 @@ define <1 x half> @vuitofp_v1i32_v1f16(<1 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v1i32_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i32(<1 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x half> %evec
@@ -1221,8 +1219,7 @@ define <2 x half> @vsitofp_v2i32_v2f16(<2 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v2i32_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i32(<2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -1233,8 +1230,7 @@ define <2 x half> @vuitofp_v2i32_v2f16(<2 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v2i32_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i32(<2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -1291,8 +1287,7 @@ define <4 x half> @vsitofp_v4i32_v4f16(<4 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i32_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -1303,8 +1298,7 @@ define <4 x half> @vuitofp_v4i32_v4f16(<4 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i32_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -1337,8 +1331,8 @@ define <4 x double> @vsitofp_v4i32_v4f64(<4 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i32_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -1349,8 +1343,8 @@ define <4 x double> @vuitofp_v4i32_v4f64(<4 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i32_v4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x double> %evec
@@ -1361,8 +1355,7 @@ define <8 x half> @vsitofp_v8i32_v8f16(<8 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i32_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -1373,8 +1366,7 @@ define <8 x half> @vuitofp_v8i32_v8f16(<8 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i32_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -1407,8 +1399,8 @@ define <8 x double> @vsitofp_v8i32_v8f64(<8 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i32_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -1419,8 +1411,8 @@ define <8 x double> @vuitofp_v8i32_v8f64(<8 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i32_v8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x double> %evec
@@ -1431,8 +1423,7 @@ define <16 x half> @vsitofp_v16i32_v16f16(<16 x i32> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v16i32_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -1443,8 +1434,7 @@ define <16 x half> @vuitofp_v16i32_v16f16(<16 x i32> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v16i32_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <16 x half> %evec
@@ -1477,9 +1467,9 @@ define <1 x half> @vsitofp_v1i64_v1f16(<1 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v1i64_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i64(<1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x half> %evec
@@ -1490,9 +1480,9 @@ define <1 x half> @vuitofp_v1i64_v1f16(<1 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v1i64_v1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i64(<1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x half> %evec
@@ -1503,8 +1493,7 @@ define <1 x float> @vsitofp_v1i64_v1f32(<1 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v1i64_v1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x float> %evec
@@ -1515,8 +1504,7 @@ define <1 x float> @vuitofp_v1i64_v1f32(<1 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v1i64_v1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <1 x float> %evec
@@ -1550,9 +1538,9 @@ define <2 x half> @vsitofp_v2i64_v2f16(<2 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v2i64_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -1563,9 +1551,9 @@ define <2 x half> @vuitofp_v2i64_v2f16(<2 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v2i64_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %evec
@@ -1576,8 +1564,7 @@ define <2 x float> @vsitofp_v2i64_v2f32(<2 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v2i64_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x float> %evec
@@ -1588,8 +1575,7 @@ define <2 x float> @vuitofp_v2i64_v2f32(<2 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v2i64_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x float> %evec
@@ -1622,9 +1608,9 @@ define <4 x half> @vsitofp_v4i64_v4f16(<4 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i64_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -1635,9 +1621,9 @@ define <4 x half> @vuitofp_v4i64_v4f16(<4 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i64_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %evec
@@ -1648,8 +1634,7 @@ define <4 x float> @vsitofp_v4i64_v4f32(<4 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v4i64_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x float> %evec
@@ -1660,8 +1645,7 @@ define <4 x float> @vuitofp_v4i64_v4f32(<4 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v4i64_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x float> %evec
@@ -1694,9 +1678,9 @@ define <8 x half> @vsitofp_v8i64_v8f16(<8 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -1707,9 +1691,9 @@ define <8 x half> @vuitofp_v8i64_v8f16(<8 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x half> %evec
@@ -1720,8 +1704,7 @@ define <8 x float> @vsitofp_v8i64_v8f32(<8 x i64> %va) strictfp {
 ; CHECK-LABEL: vsitofp_v8i64_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
@@ -1732,8 +1715,7 @@ define <8 x float> @vuitofp_v8i64_v8f32(<8 x i64> %va) strictfp {
 ; CHECK-LABEL: vuitofp_v8i64_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <8 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index 352666de57881..860319fe28583 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -14,15 +14,13 @@ define <2 x i8> @vpgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x i8> %v
@@ -160,15 +158,13 @@ define <3 x i8> @vpgather_v3i8(<3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_v3i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v3i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
   ret <3 x i8> %v
@@ -178,15 +174,13 @@ define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_truemask_v3i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v3i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl)
   ret <3 x i8> %v
@@ -198,15 +192,13 @@ define <4 x i8> @vpgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_v4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x i8> %v
@@ -216,15 +208,13 @@ define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_truemask_v4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x i8> %v
@@ -236,15 +226,13 @@ define <8 x i8> @vpgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_v8i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x i8> %v
@@ -323,15 +311,13 @@ define <2 x i16> @vpgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_v2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
@@ -427,15 +413,13 @@ define <4 x i16> @vpgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_v4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
@@ -445,15 +429,13 @@ define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_truemask_v4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x i16> %v
@@ -465,15 +447,13 @@ define <8 x i16> @vpgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_v8i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
@@ -484,18 +464,18 @@ define <8 x i16> @vpgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
   %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -507,18 +487,18 @@ define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i16>
   %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
@@ -561,9 +541,9 @@ define <8 x i16> @vpgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
   %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -582,8 +562,7 @@ define <2 x i32> @vpgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl)
 ; RV64-LABEL: vpgather_v2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
@@ -643,8 +622,7 @@ define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl)
 ; RV64-LABEL: vpgather_v4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
@@ -660,8 +638,7 @@ define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV64-LABEL: vpgather_truemask_v4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x i32> %v
@@ -679,8 +656,7 @@ define <8 x i32> @vpgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl)
 ; RV64-LABEL: vpgather_v8i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
@@ -700,9 +676,9 @@ define <8 x i32> @vpgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
   %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -723,9 +699,9 @@ define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
@@ -738,18 +714,18 @@ define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 4
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vwmulu.vx v10, v8, a2
+; RV32-NEXT:    vwmulu.vx v9, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vwmulu.vx v10, v8, a2
+; RV64-NEXT:    vwmulu.vx v9, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
@@ -771,9 +747,9 @@ define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
   %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -794,9 +770,9 @@ define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs,
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
@@ -875,8 +851,8 @@ define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_v4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4i64:
@@ -892,8 +868,8 @@ define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_truemask_v4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4i64:
@@ -911,8 +887,8 @@ define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8i64:
@@ -929,9 +905,9 @@ define <8 x i64> @vpgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vsll.vi v12, v10, 3
+; RV32-NEXT:    vsll.vi v10, v10, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i64:
@@ -952,9 +928,9 @@ define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vsll.vi v12, v10, 3
+; RV32-NEXT:    vsll.vi v10, v10, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
@@ -976,18 +952,18 @@ define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v11, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v11, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v11, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v11, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i64>
   %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
@@ -1000,9 +976,9 @@ define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulsu.vx v12, v8, a2
+; RV32-NEXT:    vwmulsu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i16_v8i64:
@@ -1023,9 +999,9 @@ define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulsu.vx v12, v8, a2
+; RV32-NEXT:    vwmulsu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
@@ -1047,18 +1023,18 @@ define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v10, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i64>
   %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
@@ -1070,9 +1046,9 @@ define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x
 ; RV32-LABEL: vpgather_baseidx_v8i32_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i32_v8i64:
@@ -1092,9 +1068,9 @@ define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs,
 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
@@ -1115,9 +1091,9 @@ define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs,
 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
@@ -1138,10 +1114,10 @@ define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m
 ; RV32-LABEL: vpgather_baseidx_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
-; RV32-NEXT:    vsll.vi v12, v12, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i64:
@@ -1162,15 +1138,13 @@ define <2 x bfloat> @vpgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %
 ; RV32-LABEL: vpgather_v2bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v2bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x bfloat> %v
@@ -1182,15 +1156,13 @@ define <4 x bfloat> @vpgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %
 ; RV32-LABEL: vpgather_v4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x bfloat> %v
@@ -1200,15 +1172,13 @@ define <4 x bfloat> @vpgather_truemask_v4bf16(<4 x ptr> %ptrs, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_truemask_v4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x bfloat> %v
@@ -1220,15 +1190,13 @@ define <8 x bfloat> @vpgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %
 ; RV32-LABEL: vpgather_v8bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x bfloat> %v
@@ -1239,18 +1207,18 @@ define <8 x bfloat> @vpgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i8_v8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
   %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1262,18 +1230,18 @@ define <8 x bfloat> @vpgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i16>
   %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
@@ -1316,9 +1284,9 @@ define <8 x bfloat> @vpgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
   %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1331,15 +1299,13 @@ define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl
 ; RV32-LABEL: vpgather_v2f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v2f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x half> %v
@@ -1351,15 +1317,13 @@ define <4 x half> @vpgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl
 ; RV32-LABEL: vpgather_v4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x half> %v
@@ -1369,15 +1333,13 @@ define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV32-LABEL: vpgather_truemask_v4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x half> %v
@@ -1389,15 +1351,13 @@ define <8 x half> @vpgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl
 ; RV32-LABEL: vpgather_v8f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x half> %v
@@ -1408,18 +1368,18 @@ define <8 x half> @vpgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
   %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1431,18 +1391,18 @@ define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vadd.vv v8, v10, v10
 ; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i16>
   %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
@@ -1485,9 +1445,9 @@ define <8 x half> @vpgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vadd.vv v8, v12, v12
 ; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
   %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1506,8 +1466,7 @@ define <2 x float> @vpgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %ev
 ; RV64-LABEL: vpgather_v2f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
   ret <2 x float> %v
@@ -1525,8 +1484,7 @@ define <4 x float> @vpgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %ev
 ; RV64-LABEL: vpgather_v4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
   ret <4 x float> %v
@@ -1542,8 +1500,7 @@ define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) {
 ; RV64-LABEL: vpgather_truemask_v4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
   ret <4 x float> %v
@@ -1561,8 +1518,7 @@ define <8 x float> @vpgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %ev
 ; RV64-LABEL: vpgather_v8f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x float> %v
@@ -1582,9 +1538,9 @@ define <8 x float> @vpgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
   %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1605,9 +1561,9 @@ define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs,
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf8 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
@@ -1620,18 +1576,18 @@ define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 4
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vwmulu.vx v10, v8, a2
+; RV32-NEXT:    vwmulu.vx v9, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vwmulu.vx v10, v8, a2
+; RV64-NEXT:    vwmulu.vx v9, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
@@ -1653,9 +1609,9 @@ define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
   %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
@@ -1676,9 +1632,9 @@ define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vsext.vf4 v12, v8
-; RV64-NEXT:    vsll.vi v12, v12, 2
+; RV64-NEXT:    vsll.vi v8, v12, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
@@ -1757,8 +1713,8 @@ define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: vpgather_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v4f64:
@@ -1774,8 +1730,8 @@ define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl)
 ; RV32-LABEL: vpgather_truemask_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_v4f64:
@@ -1793,8 +1749,8 @@ define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %e
 ; RV32-LABEL: vpgather_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_v8f64:
@@ -1811,9 +1767,9 @@ define <8 x double> @vpgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vsll.vi v12, v10, 3
+; RV32-NEXT:    vsll.vi v10, v10, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f64:
@@ -1834,9 +1790,9 @@ define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vsext.vf4 v10, v8
-; RV32-NEXT:    vsll.vi v12, v10, 3
+; RV32-NEXT:    vsll.vi v10, v10, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
@@ -1858,18 +1814,18 @@ define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v11, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v11, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v11, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v11, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
@@ -1882,9 +1838,9 @@ define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulsu.vx v12, v8, a2
+; RV32-NEXT:    vwmulsu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i16_v8f64:
@@ -1905,9 +1861,9 @@ define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idx
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulsu.vx v12, v8, a2
+; RV32-NEXT:    vwmulsu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
@@ -1929,18 +1885,18 @@ define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idx
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v10, v8, a2
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
@@ -1952,9 +1908,9 @@ define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
 ; RV32-LABEL: vpgather_baseidx_v8i32_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8i32_v8f64:
@@ -1974,9 +1930,9 @@ define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idx
 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
@@ -1997,9 +1953,9 @@ define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idx
 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vsll.vi v12, v8, 3
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
@@ -2020,10 +1976,10 @@ define <8 x double> @vpgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
 ; RV32-LABEL: vpgather_baseidx_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
-; RV32-NEXT:    vsll.vi v12, v12, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v10, v8, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_v8f64:
@@ -2613,13 +2569,13 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
 ; RV32-LABEL: vpgather_baseidx_v32f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT:    vslideup.vi v16, v24, 16
+; RV32-NEXT:    vslideup.vi v8, v16, 16
 ; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsll.vi v16, v8, 3
 ; RV32-NEXT:    mv a2, a1
 ; RV32-NEXT:    bltu a1, a3, .LBB104_2
 ; RV32-NEXT:  # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 4299707c9a48c..16292772a5bac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -26,8 +26,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vmsltu.vx v12, v10, a0
-; RV64-NEXT:    vmand.mm v9, v9, v12
+; RV64-NEXT:    vmsltu.vx v10, v10, a0
+; RV64-NEXT:    vmand.mm v9, v9, v10
 ; RV64-NEXT:    vmandn.mm v8, v8, v9
 ; RV64-NEXT:    vmand.mm v9, v0, v9
 ; RV64-NEXT:    vmor.mm v0, v9, v8
@@ -48,8 +48,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
 ; RV64ZVFHMIN:       # %bb.0:
 ; RV64ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64ZVFHMIN-NEXT:    vid.v v10
-; RV64ZVFHMIN-NEXT:    vmsltu.vx v12, v10, a0
-; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v12
+; RV64ZVFHMIN-NEXT:    vmsltu.vx v10, v10, a0
+; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
 ; RV64ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
 ; RV64ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
 ; RV64ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
@@ -63,8 +63,8 @@ define <8 x i1> @vpmerge_vv_v8i1(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %m, i32 ze
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vmsltu.vx v12, v10, a0
-; RV32-NEXT:    vmand.mm v9, v9, v12
+; RV32-NEXT:    vmsltu.vx v10, v10, a0
+; RV32-NEXT:    vmand.mm v9, v9, v10
 ; RV32-NEXT:    vmandn.mm v8, v8, v9
 ; RV32-NEXT:    vmand.mm v9, v0, v9
 ; RV32-NEXT:    vmor.mm v0, v9, v8
@@ -85,8 +85,8 @@ define <8 x i1> @vpmerge_vv_v8i1(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %m, i32 ze
 ; RV32ZVFHMIN:       # %bb.0:
 ; RV32ZVFHMIN-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32ZVFHMIN-NEXT:    vid.v v10
-; RV32ZVFHMIN-NEXT:    vmsltu.vx v12, v10, a0
-; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v12
+; RV32ZVFHMIN-NEXT:    vmsltu.vx v10, v10, a0
+; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
 ; RV32ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
 ; RV32ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
 ; RV32ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index c361ccce14e4a..33810b434287a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -954,8 +954,8 @@ define void @vpscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
 ; RV32-LABEL: vpscatter_baseidx_v8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
-; RV32-NEXT:    vsll.vi v12, v16, 3
+; RV32-NEXT:    vnsrl.wi v12, v12, 0
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1721,8 +1721,8 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %id
 ; RV32-LABEL: vpscatter_baseidx_v8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
-; RV32-NEXT:    vsll.vi v12, v16, 3
+; RV32-NEXT:    vnsrl.wi v12, v12, 0
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
index 32ae81926bbee..d600eca589dbd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll
@@ -1016,8 +1016,8 @@ define <4 x i64> @vrol_vx_v4i64(<4 x i64> %a, i64 %b) {
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v14, 0
-; RV32-NEXT:    vwsub.vx v12, v14, a0
+; RV32-NEXT:    vmv.v.i v13, 0
+; RV32-NEXT:    vwsub.vx v12, v13, a0
 ; RV32-NEXT:    li a0, 63
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vand.vx v10, v10, a0
@@ -1081,8 +1081,8 @@ define <8 x i64> @vrol_vx_v8i64(<8 x i64> %a, i64 %b) {
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.i v20, 0
-; RV32-NEXT:    vwsub.vx v16, v20, a0
+; RV32-NEXT:    vmv.v.i v18, 0
+; RV32-NEXT:    vwsub.vx v16, v18, a0
 ; RV32-NEXT:    li a0, 63
 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; RV32-NEXT:    vand.vx v12, v12, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
index ec22d2be1eaad..b4835919935fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll
@@ -1833,8 +1833,8 @@ define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v10, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v14, 0
-; CHECK-RV32-NEXT:    vwsub.vx v12, v14, a0
+; CHECK-RV32-NEXT:    vmv.v.i v13, 0
+; CHECK-RV32-NEXT:    vwsub.vx v12, v13, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v10, v10, a0
@@ -1872,9 +1872,9 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) {
 ; CHECK-RV32-LABEL: vror_vi_v4i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v12, 0
+; CHECK-RV32-NEXT:    vmv.v.i v11, 0
 ; CHECK-RV32-NEXT:    li a0, 1
-; CHECK-RV32-NEXT:    vwsubu.vx v10, v12, a0
+; CHECK-RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v12, a0
@@ -1907,9 +1907,9 @@ define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) {
 ; CHECK-RV32-LABEL: vror_vi_rotl_v4i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v12, 0
+; CHECK-RV32-NEXT:    vmv.v.i v11, 0
 ; CHECK-RV32-NEXT:    li a0, 1
-; CHECK-RV32-NEXT:    vwsubu.vx v10, v12, a0
+; CHECK-RV32-NEXT:    vwsubu.vx v10, v11, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v12, a0
@@ -1969,8 +1969,8 @@ define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v12, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v20, 0
-; CHECK-RV32-NEXT:    vwsub.vx v16, v20, a0
+; CHECK-RV32-NEXT:    vmv.v.i v18, 0
+; CHECK-RV32-NEXT:    vwsub.vx v16, v18, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v12, v12, a0
@@ -2008,9 +2008,9 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) {
 ; CHECK-RV32-LABEL: vror_vi_v8i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v16, 0
+; CHECK-RV32-NEXT:    vmv.v.i v14, 0
 ; CHECK-RV32-NEXT:    li a0, 1
-; CHECK-RV32-NEXT:    vwsubu.vx v12, v16, a0
+; CHECK-RV32-NEXT:    vwsubu.vx v12, v14, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v16, a0
@@ -2043,9 +2043,9 @@ define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) {
 ; CHECK-RV32-LABEL: vror_vi_rotl_v8i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v16, 0
+; CHECK-RV32-NEXT:    vmv.v.i v14, 0
 ; CHECK-RV32-NEXT:    li a0, 1
-; CHECK-RV32-NEXT:    vwsubu.vx v12, v16, a0
+; CHECK-RV32-NEXT:    vwsubu.vx v12, v14, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v16, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
index 7bac239cfffea..9cb0cb52930c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll
@@ -41,13 +41,12 @@ define <8 x i64> @vwaddu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
 define <8 x i64> @vwaddu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-LABEL: vwaddu_vv_mask_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
 ; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
-; CHECK-NEXT:    vmv.v.i v10, 0
-; CHECK-NEXT:    vmerge.vvm v14, v10, v8, v0
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
     %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
     %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -82,8 +81,8 @@ define <8 x i64> @vwadd_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
 ; CHECK-NEXT:    vmv.v.i v10, 1
-; CHECK-NEXT:    vmerge.vvm v16, v10, v8, v0
-; CHECK-NEXT:    vwadd.wv v8, v12, v16
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vwadd.wv v8, v12, v10
 ; CHECK-NEXT:    ret
     %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
     %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
index b39fff64b1090..4b19284393359 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
@@ -102,9 +102,9 @@ define <16 x i16> @vwadd_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -118,9 +118,9 @@ define <8 x i32> @vwadd_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -134,9 +134,9 @@ define <4 x i64> @vwadd_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -151,9 +151,9 @@ define <32 x i16> @vwadd_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -167,9 +167,9 @@ define <16 x i32> @vwadd_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -183,9 +183,9 @@ define <8 x  i64> @vwadd_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -200,9 +200,9 @@ define <64 x i16> @vwadd_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -217,9 +217,9 @@ define <32 x i32> @vwadd_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -233,9 +233,9 @@ define <16 x i64> @vwadd_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwadd_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -388,9 +388,9 @@ define <4 x i64> @vwadd_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsext.vf4 v11, v8
-; CHECK-NEXT:    vwadd.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vwadd.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -500,8 +500,8 @@ define <16 x i16> @vwadd_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwadd_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -516,8 +516,8 @@ define <8 x i32> @vwadd_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwadd_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -532,8 +532,8 @@ define <4 x i64> @vwadd_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwadd_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -549,8 +549,8 @@ define <32 x i16> @vwadd_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -565,8 +565,8 @@ define <16 x i32> @vwadd_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwadd_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -581,8 +581,8 @@ define <8 x i64> @vwadd_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwadd_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -598,8 +598,8 @@ define <64 x i16> @vwadd_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -615,8 +615,8 @@ define <32 x i32> @vwadd_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -631,8 +631,8 @@ define <16 x i64> @vwadd_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwadd_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwadd.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwadd.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
index 1a716f688dd59..5e6fbb005c91b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -102,9 +102,9 @@ define <16 x i16> @vwaddu_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -118,9 +118,9 @@ define <8 x i32> @vwaddu_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -134,9 +134,9 @@ define <4 x i64> @vwaddu_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -151,9 +151,9 @@ define <32 x i16> @vwaddu_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -167,9 +167,9 @@ define <16 x i32> @vwaddu_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -183,9 +183,9 @@ define <8 x  i64> @vwaddu_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -200,9 +200,9 @@ define <64 x i16> @vwaddu_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -217,9 +217,9 @@ define <32 x i32> @vwaddu_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -233,9 +233,9 @@ define <16 x i64> @vwaddu_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwaddu_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwaddu.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwaddu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -388,9 +388,9 @@ define <4 x i64> @vwaddu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vzext.vf4 v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -500,8 +500,8 @@ define <16 x i16> @vwaddu_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwaddu_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -516,8 +516,8 @@ define <8 x i32> @vwaddu_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwaddu_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -532,8 +532,8 @@ define <4 x i64> @vwaddu_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwaddu_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -549,8 +549,8 @@ define <32 x i16> @vwaddu_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -565,8 +565,8 @@ define <16 x i32> @vwaddu_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwaddu_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -581,8 +581,8 @@ define <8 x i64> @vwaddu_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwaddu_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -598,8 +598,8 @@ define <64 x i16> @vwaddu_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -615,8 +615,8 @@ define <32 x i32> @vwaddu_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -631,8 +631,8 @@ define <16 x i64> @vwaddu_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwaddu_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwaddu.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwaddu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
index 94c3138fd330b..81620805f69cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -126,9 +126,9 @@ define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -142,9 +142,9 @@ define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -158,9 +158,9 @@ define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -175,9 +175,9 @@ define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -191,9 +191,9 @@ define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -207,9 +207,9 @@ define <8 x  i64> @vwmul_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -224,9 +224,9 @@ define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -241,9 +241,9 @@ define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -257,9 +257,9 @@ define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmul_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -424,9 +424,9 @@ define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsext.vf4 v11, v8
-; CHECK-NEXT:    vwmul.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vwmul.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -536,8 +536,8 @@ define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwmul_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -552,8 +552,8 @@ define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmul_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -568,8 +568,8 @@ define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmul_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -585,8 +585,8 @@ define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -601,8 +601,8 @@ define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmul_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -617,8 +617,8 @@ define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmul_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -634,8 +634,8 @@ define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -651,8 +651,8 @@ define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -667,8 +667,8 @@ define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmul_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwmul.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwmul.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
index 8ebd93e9dc637..a015689672351 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
@@ -118,9 +118,9 @@ define <16 x i16> @vwmulsu_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -134,9 +134,9 @@ define <8 x i32> @vwmulsu_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -150,9 +150,9 @@ define <4 x i64> @vwmulsu_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -167,9 +167,9 @@ define <32 x i16> @vwmulsu_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -183,9 +183,9 @@ define <16 x i32> @vwmulsu_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -199,9 +199,9 @@ define <8 x  i64> @vwmulsu_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -216,9 +216,9 @@ define <64 x i16> @vwmulsu_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -233,9 +233,9 @@ define <32 x i32> @vwmulsu_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -249,9 +249,9 @@ define <16 x i64> @vwmulsu_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulsu_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -416,9 +416,9 @@ define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsext.vf4 v11, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -545,8 +545,8 @@ define <16 x i16> @vwmulsu_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -561,8 +561,8 @@ define <8 x i32> @vwmulsu_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -577,8 +577,8 @@ define <4 x i64> @vwmulsu_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -594,8 +594,8 @@ define <32 x i16> @vwmulsu_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -610,8 +610,8 @@ define <16 x i32> @vwmulsu_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -626,8 +626,8 @@ define <8 x i64> @vwmulsu_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -643,8 +643,8 @@ define <64 x i16> @vwmulsu_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -660,8 +660,8 @@ define <32 x i32> @vwmulsu_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -676,8 +676,8 @@ define <16 x i64> @vwmulsu_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulsu_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
index 90e9ffdcb320a..b59b9ff65afda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
@@ -102,9 +102,9 @@ define <16 x i16> @vwmulu_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -118,9 +118,9 @@ define <8 x i32> @vwmulu_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -134,9 +134,9 @@ define <4 x i64> @vwmulu_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -151,9 +151,9 @@ define <32 x i16> @vwmulu_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -167,9 +167,9 @@ define <16 x i32> @vwmulu_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -183,9 +183,9 @@ define <8 x  i64> @vwmulu_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -200,9 +200,9 @@ define <64 x i16> @vwmulu_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -217,9 +217,9 @@ define <32 x i32> @vwmulu_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -233,9 +233,9 @@ define <16 x i64> @vwmulu_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwmulu_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwmulu.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwmulu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -400,9 +400,9 @@ define <4 x i64> @vwmulu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vzext.vf4 v11, v8
-; CHECK-NEXT:    vwmulu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vwmulu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -512,8 +512,8 @@ define <16 x i16> @vwmulu_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwmulu_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -528,8 +528,8 @@ define <8 x i32> @vwmulu_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmulu_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -544,8 +544,8 @@ define <4 x i64> @vwmulu_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulu_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -561,8 +561,8 @@ define <32 x i16> @vwmulu_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -577,8 +577,8 @@ define <16 x i32> @vwmulu_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwmulu_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -593,8 +593,8 @@ define <8 x i64> @vwmulu_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulu_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -610,8 +610,8 @@ define <64 x i16> @vwmulu_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -627,8 +627,8 @@ define <32 x i32> @vwmulu_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -643,8 +643,8 @@ define <16 x i64> @vwmulu_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwmulu_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwmulu.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwmulu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
index 2c9aed6274dd8..ce87e7f64391e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
@@ -13,16 +13,15 @@ define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <4 x i32> %a to <4 x i64>
   %y = sext <4 x i32> %b to <4 x i64>
@@ -35,16 +34,15 @@ define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <4 x i32> %a to <4 x i64>
   %y = zext <4 x i32> %b to <4 x i64>
@@ -63,8 +61,8 @@ define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer
@@ -77,19 +75,19 @@ define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: vwsll_vx_i32_v4i64_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vmv.v.x v11, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v11
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vmv.v.x v10, a0
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v9, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -103,18 +101,18 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: vwsll_vx_i32_v4i64_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -138,8 +136,8 @@ define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -163,8 +161,8 @@ define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -188,8 +186,8 @@ define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -213,8 +211,8 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -228,16 +226,16 @@ define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) {
 ; CHECK-LABEL: vwsll_vi_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <4 x i32> %a to <4 x i64>
   %z = shl <4 x i64> %x, splat (i64 2)
@@ -253,16 +251,15 @@ define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <8 x i16> %a to <8 x i32>
   %y = sext <8 x i16> %b to <8 x i32>
@@ -275,16 +272,15 @@ define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <8 x i16> %a to <8 x i32>
   %y = zext <8 x i16> %b to <8 x i32>
@@ -303,8 +299,8 @@ define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -325,8 +321,8 @@ define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -339,19 +335,19 @@ define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) {
 ; CHECK-LABEL: vwsll_vx_i16_v8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vmv.v.x v11, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v11
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vmv.v.x v10, a0
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v9, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -365,18 +361,18 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) {
 ; CHECK-LABEL: vwsll_vx_i16_v8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -400,8 +396,8 @@ define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -425,8 +421,8 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -440,16 +436,16 @@ define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) {
 ; CHECK-LABEL: vwsll_vi_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_v8i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <8 x i16> %a to <8 x i32>
   %z = shl <8 x i32> %x, splat (i32 2)
@@ -465,16 +461,15 @@ define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <16 x i8> %a to <16 x i16>
   %y = sext <16 x i8> %b to <16 x i16>
@@ -487,16 +482,15 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <16 x i8> %a to <16 x i16>
   %y = zext <16 x i8> %b to <16 x i16>
@@ -516,8 +510,8 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v24, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vnsrl.wi v14, v8, 0
-; RV32-NEXT:    vsll.vv v8, v12, v14
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vv v8, v12, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vwsll_vx_i64_v16i16:
@@ -529,8 +523,8 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v16, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vnsrl.wi v14, v8, 0
-; RV64-NEXT:    vsll.vv v8, v12, v14
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsll.vv v8, v12, v8
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-RV32-LABEL: vwsll_vx_i64_v16i16:
@@ -544,15 +538,15 @@ define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) {
 ; CHECK-ZVBB-RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-ZVBB-RV32-NEXT:    vnsrl.wi v8, v24, 0
 ; CHECK-ZVBB-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-RV32-NEXT:    vnsrl.wi v14, v8, 0
-; CHECK-ZVBB-RV32-NEXT:    vsll.vv v8, v12, v14
+; CHECK-ZVBB-RV32-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-ZVBB-RV32-NEXT:    vsll.vv v8, v12, v8
 ; CHECK-ZVBB-RV32-NEXT:    ret
 ;
 ; CHECK-ZVBB-RV64-LABEL: vwsll_vx_i64_v16i16:
 ; CHECK-ZVBB-RV64:       # %bb.0:
 ; CHECK-ZVBB-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-RV64-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-RV64-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-RV64-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-RV64-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-RV64-NEXT:    ret
   %head = insertelement <8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <16 x i32> zeroinitializer
@@ -576,8 +570,8 @@ define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <16 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer
@@ -598,8 +592,8 @@ define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <16 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer
@@ -612,19 +606,19 @@ define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) {
 ; CHECK-LABEL: vwsll_vx_i8_v16i16_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vmv.v.x v11, a0
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v11
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vmv.v.x v10, a0
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v9, v10
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <16 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -638,18 +632,18 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
 ; CHECK-LABEL: vwsll_vx_i8_v16i16_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <16 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -663,16 +657,16 @@ define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) {
 ; CHECK-LABEL: vwsll_vi_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_v16i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <16 x i8> %a to <16 x i16>
   %z = shl <16 x i16> %x, splat (i16 2)
@@ -752,20 +746,20 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_sext(<4 x i8> %a, i32 %b) {
 ; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v12, a0
+; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
 ; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vsext.vf2 v8, v12
+; CHECK-ZVBB-NEXT:    vsext.vf2 v8, v9
 ; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -780,18 +774,18 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) {
 ; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -843,8 +837,8 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -896,8 +890,8 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -918,8 +912,8 @@ define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) {
 ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <4 x i8> %a to <4 x i64>
   %z = shl <4 x i64> %x, splat (i64 2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
index eafea7292a54b..20e10f121a613 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
@@ -41,13 +41,12 @@ define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
 define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-LABEL: vwsubu_vv_mask_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
 ; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
-; CHECK-NEXT:    vmv.v.i v10, 0
-; CHECK-NEXT:    vmerge.vvm v14, v10, v8, v0
-; CHECK-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vwsubu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
     %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
     %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -64,8 +63,8 @@ define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
 ; CHECK-NEXT:    vmv.v.i v10, 1
-; CHECK-NEXT:    vmerge.vvm v16, v10, v8, v0
-; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v12, v10
 ; CHECK-NEXT:    ret
     %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
     %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
index 783de24100613..a03d657ae89bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
@@ -102,9 +102,9 @@ define <16 x i16> @vwsub_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -118,9 +118,9 @@ define <8 x i32> @vwsub_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -134,9 +134,9 @@ define <4 x i64> @vwsub_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -151,9 +151,9 @@ define <32 x i16> @vwsub_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -167,9 +167,9 @@ define <16 x i32> @vwsub_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -183,9 +183,9 @@ define <8 x  i64> @vwsub_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -200,9 +200,9 @@ define <64 x i16> @vwsub_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -217,9 +217,9 @@ define <32 x i32> @vwsub_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -233,9 +233,9 @@ define <16 x i64> @vwsub_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsub_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -388,9 +388,9 @@ define <4 x i64> @vwsub_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vsext.vf4 v11, v8
-; CHECK-NEXT:    vwsub.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vwsub.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -500,8 +500,8 @@ define <16 x i16> @vwsub_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwsub_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -516,8 +516,8 @@ define <8 x i32> @vwsub_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwsub_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -532,8 +532,8 @@ define <4 x i64> @vwsub_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsub_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -549,8 +549,8 @@ define <32 x i16> @vwsub_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -565,8 +565,8 @@ define <16 x i32> @vwsub_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwsub_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -581,8 +581,8 @@ define <8 x i64> @vwsub_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsub_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -598,8 +598,8 @@ define <64 x i16> @vwsub_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -615,8 +615,8 @@ define <32 x i32> @vwsub_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -631,8 +631,8 @@ define <16 x i64> @vwsub_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsub_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwsub.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwsub.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
index bfdda47cc819e..483da358b6cf1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -102,9 +102,9 @@ define <16 x i16> @vwsubu_v16i16(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vle8.v v11, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = load <16 x i8>, ptr %y
@@ -118,9 +118,9 @@ define <8 x i32> @vwsubu_v8i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vle16.v v11, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = load <8 x i16>, ptr %y
@@ -134,9 +134,9 @@ define <4 x i64> @vwsubu_v4i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vle32.v v11, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i32>, ptr %y
@@ -151,9 +151,9 @@ define <32 x i16> @vwsubu_v32i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vle8.v v14, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
@@ -167,9 +167,9 @@ define <16 x i32> @vwsubu_v16i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vle16.v v14, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v12, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
@@ -183,9 +183,9 @@ define <8 x  i64> @vwsubu_v8i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vle32.v v14, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vle32.v v12, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %a = load <8 x  i32>, ptr %x
   %b = load <8 x  i32>, ptr %y
@@ -200,9 +200,9 @@ define <64 x i16> @vwsubu_v64i16(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vle8.v v20, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vle8.v v16, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = load <64 x i8>, ptr %y
@@ -217,9 +217,9 @@ define <32 x i32> @vwsubu_v32i32(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vle16.v v20, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v16, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = load <32 x i16>, ptr %y
@@ -233,9 +233,9 @@ define <16 x i64> @vwsubu_v16i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: vwsubu_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vle32.v v20, (a1)
-; CHECK-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    vwsubu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = load <16 x i32>, ptr %y
@@ -388,9 +388,9 @@ define <4 x i64> @vwsubu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vzext.vf4 v11, v8
-; CHECK-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vwsubu.vv v8, v9, v10
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = load <4 x i8>, ptr %y
@@ -500,8 +500,8 @@ define <16 x i16> @vwsubu_vx_v16i16(ptr %x, i8 %y) {
 ; CHECK-LABEL: vwsubu_vx_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v10, a1
+; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -516,8 +516,8 @@ define <8 x i32> @vwsubu_vx_v8i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwsubu_vx_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v10, a1
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = insertelement <8 x i16> poison, i16 %y, i32 0
@@ -532,8 +532,8 @@ define <4 x i64> @vwsubu_vx_v4i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsubu_vx_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v10, a1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v9, a1
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = insertelement <4 x i32> poison, i32 %y, i64 0
@@ -549,8 +549,8 @@ define <32 x i16> @vwsubu_vx_v32i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v12, a1
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = insertelement <32 x i8> poison, i8 %y, i32 0
@@ -565,8 +565,8 @@ define <16 x i32> @vwsubu_vx_v16i32(ptr %x, i16 %y) {
 ; CHECK-LABEL: vwsubu_vx_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v12, a1
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = insertelement <16 x i16> poison, i16 %y, i32 0
@@ -581,8 +581,8 @@ define <8 x i64> @vwsubu_vx_v8i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsubu_vx_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v12, a1
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v10, a1
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = insertelement <8 x i32> poison, i32 %y, i64 0
@@ -598,8 +598,8 @@ define <64 x i16> @vwsubu_vx_v64i16(ptr %x, i8 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v16, a1
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = insertelement <64 x i8> poison, i8 %y, i32 0
@@ -615,8 +615,8 @@ define <32 x i32> @vwsubu_vx_v32i32(ptr %x, i16 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v16, a1
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = insertelement <32 x i16> poison, i16 %y, i32 0
@@ -631,8 +631,8 @@ define <16 x i64> @vwsubu_vx_v16i64(ptr %x, i32 %y) {
 ; CHECK-LABEL: vwsubu_vx_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vwsubu.vx v8, v16, a1
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vwsubu.vx v8, v12, a1
 ; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = insertelement <16 x i32> poison, i32 %y, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
index 8259336e8668c..00099133bda6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll
@@ -128,8 +128,8 @@ define <4 x i64> @vzext_v4i64_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl
 ; CHECK-LABEL: vzext_v4i64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i64> %v
@@ -139,8 +139,8 @@ define <4 x i64> @vzext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vzext_v4i64_v4i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
   ret <4 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
index a189711d11471..ff957e37af7b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
@@ -8,9 +8,9 @@ define i32 @vqdot_vv(<16 x i8> %a, <16 x i8> %b) {
 ; NODOT-LABEL: vqdot_vv:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vsext.vf2 v14, v9
-; NODOT-NEXT:    vwmul.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vsext.vf2 v12, v9
+; NODOT-NEXT:    vwmul.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v12, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v12
@@ -38,9 +38,9 @@ define i32 @vqdot_vx_constant(<16 x i8> %a) {
 ; CHECK-LABEL: vqdot_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, 23
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
@@ -57,9 +57,9 @@ define i32 @vqdot_vx_constant_swapped(<16 x i8> %a) {
 ; CHECK-LABEL: vqdot_vx_constant_swapped:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, 23
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
@@ -106,9 +106,9 @@ define i32 @vqdotu_vx_constant(<16 x i8> %a) {
 ; CHECK-LABEL: vqdotu_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, 123
-; CHECK-NEXT:    vwmulu.vx v8, v12, a0
+; CHECK-NEXT:    vwmulu.vx v8, v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
@@ -125,9 +125,9 @@ define i32 @vqdotsu_vv(<16 x i8> %a, <16 x i8> %b) {
 ; NODOT-LABEL: vqdotsu_vv:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vzext.vf2 v14, v9
-; NODOT-NEXT:    vwmulsu.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vzext.vf2 v12, v9
+; NODOT-NEXT:    vwmulsu.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v12, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v12
@@ -155,9 +155,9 @@ define i32 @vqdotsu_vv_swapped(<16 x i8> %a, <16 x i8> %b) {
 ; NODOT-LABEL: vqdotsu_vv_swapped:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vzext.vf2 v14, v9
-; NODOT-NEXT:    vwmulsu.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vzext.vf2 v12, v9
+; NODOT-NEXT:    vwmulsu.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v12, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v12
@@ -185,9 +185,9 @@ define i32 @vdotqsu_vx_constant(<16 x i8> %a) {
 ; CHECK-LABEL: vdotqsu_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, 123
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
@@ -204,10 +204,10 @@ define i32 @vdotqus_vx_constant(<16 x i8> %a) {
 ; CHECK-LABEL: vdotqus_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v10, v8
 ; CHECK-NEXT:    li a0, -23
-; CHECK-NEXT:    vmv.v.x v14, a0
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.s.x v12, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v12
@@ -279,8 +279,8 @@ define i32 @vqdot_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; NODOT-NEXT:    vsext.vf2 v10, v8
-; NODOT-NEXT:    vsext.vf2 v16, v9
-; NODOT-NEXT:    vwmacc.vv v12, v10, v16
+; NODOT-NEXT:    vsext.vf2 v8, v9
+; NODOT-NEXT:    vwmacc.vv v12, v10, v8
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v8, zero
 ; NODOT-NEXT:    vredsum.vs v8, v12, v8
@@ -347,8 +347,8 @@ define i32 @vqdotsu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; NODOT-NEXT:    vsext.vf2 v10, v8
-; NODOT-NEXT:    vzext.vf2 v16, v9
-; NODOT-NEXT:    vwmaccsu.vv v12, v10, v16
+; NODOT-NEXT:    vzext.vf2 v8, v9
+; NODOT-NEXT:    vwmaccsu.vv v12, v10, v8
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v8, zero
 ; NODOT-NEXT:    vredsum.vs v8, v12, v8
@@ -380,9 +380,9 @@ define i32 @vqdot_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
 ; NODOT-LABEL: vqdot_vv_scalar_add:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vsext.vf2 v14, v9
-; NODOT-NEXT:    vwmul.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vsext.vf2 v12, v9
+; NODOT-NEXT:    vwmul.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v12, a0
 ; NODOT-NEXT:    vredsum.vs v8, v8, v12
@@ -442,9 +442,9 @@ define i32 @vqdotsu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
 ; NODOT-LABEL: vqdotsu_vv_scalar_add:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vzext.vf2 v14, v9
-; NODOT-NEXT:    vwmulsu.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vzext.vf2 v12, v9
+; NODOT-NEXT:    vwmulsu.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vmv.s.x v12, a0
 ; NODOT-NEXT:    vredsum.vs v8, v8, v12
@@ -784,9 +784,9 @@ define <2 x i32> @vqdot_vv_partial_reduce_v2i32_v64i8(<64 x i8> %a, <64 x i8> %b
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x05, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 5 * vlenb
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v24, v8
-; CHECK-NEXT:    vsext.vf2 v28, v12
-; CHECK-NEXT:    vwmul.vv v16, v24, v28
+; CHECK-NEXT:    vsext.vf2 v20, v8
+; CHECK-NEXT:    vsext.vf2 v24, v12
+; CHECK-NEXT:    vwmul.vv v16, v20, v24
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m8, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v24, v16, 28
 ; CHECK-NEXT:    vslidedown.vi v0, v16, 26
@@ -953,9 +953,9 @@ define <4 x i32> @vqdot_vv_partial_reduce_v4i32_v16i8(<16 x i8> %a, <16 x i8> %b
 ; NODOT-LABEL: vqdot_vv_partial_reduce_v4i32_v16i8:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vsext.vf2 v14, v9
-; NODOT-NEXT:    vwmul.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vsext.vf2 v12, v9
+; NODOT-NEXT:    vwmul.vv v8, v10, v12
 ; NODOT-NEXT:    vsetivli zero, 4, e32, m4, ta, ma
 ; NODOT-NEXT:    vslidedown.vi v12, v8, 12
 ; NODOT-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
@@ -1031,9 +1031,9 @@ define <4 x i32> @vqdot_vv_partial_reduce_m1_accum(<16 x i8> %a, <16 x i8> %b, <
 ; NODOT-LABEL: vqdot_vv_partial_reduce_m1_accum:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vsext.vf2 v18, v9
-; NODOT-NEXT:    vwmul.vv v12, v16, v18
+; NODOT-NEXT:    vsext.vf2 v14, v8
+; NODOT-NEXT:    vsext.vf2 v8, v9
+; NODOT-NEXT:    vwmul.vv v12, v14, v8
 ; NODOT-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; NODOT-NEXT:    vadd.vv v16, v10, v12
 ; NODOT-NEXT:    vsetivli zero, 4, e32, m4, ta, ma
@@ -1067,9 +1067,9 @@ define <16 x i32> @vqdot_vv_partial_reduce3(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: vqdot_vv_partial_reduce3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
 entry:
   %a.sext = sext <16 x i8> %a to <16 x i32>
@@ -1122,8 +1122,8 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; NODOT-NEXT:    vslidedown.vx v12, v0, a0
 ; NODOT-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; NODOT-NEXT:    vzext.vf2 v4, v12
-; NODOT-NEXT:    vwmulsu.vv v24, v8, v4
+; NODOT-NEXT:    vzext.vf2 v28, v12
+; NODOT-NEXT:    vwmulsu.vv v24, v8, v28
 ; NODOT-NEXT:    csrr a3, vlenb
 ; NODOT-NEXT:    slli a3, a3, 5
 ; NODOT-NEXT:    add a3, sp, a3
@@ -1141,7 +1141,10 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    vzext.vf2 v0, v8
 ; NODOT-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
 ; NODOT-NEXT:    vle8.v v8, (a1)
-; NODOT-NEXT:    addi a1, sp, 16
+; NODOT-NEXT:    csrr a1, vlenb
+; NODOT-NEXT:    slli a1, a1, 3
+; NODOT-NEXT:    add a1, sp, a1
+; NODOT-NEXT:    addi a1, a1, 16
 ; NODOT-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; NODOT-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; NODOT-NEXT:    vwmaccsu.vv v24, v4, v0
@@ -1158,10 +1161,15 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    slli a1, a1, 4
 ; NODOT-NEXT:    add a1, sp, a1
 ; NODOT-NEXT:    addi a1, a1, 16
-; NODOT-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
-; NODOT-NEXT:    vsext.vf2 v12, v16
+; NODOT-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; NODOT-NEXT:    vsext.vf2 v16, v8
+; NODOT-NEXT:    csrr a1, vlenb
+; NODOT-NEXT:    slli a1, a1, 3
+; NODOT-NEXT:    add a1, sp, a1
+; NODOT-NEXT:    addi a1, a1, 16
+; NODOT-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vzext.vf2 v20, v8
-; NODOT-NEXT:    vwmaccsu.vv v0, v12, v20
+; NODOT-NEXT:    vwmaccsu.vv v0, v16, v20
 ; NODOT-NEXT:    li a1, 64
 ; NODOT-NEXT:    csrr a2, vlenb
 ; NODOT-NEXT:    slli a2, a2, 5
@@ -1170,10 +1178,7 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; NODOT-NEXT:    vslidedown.vx v8, v16, a1
-; NODOT-NEXT:    csrr a2, vlenb
-; NODOT-NEXT:    slli a2, a2, 5
-; NODOT-NEXT:    add a2, sp, a2
-; NODOT-NEXT:    addi a2, a2, 16
+; NODOT-NEXT:    addi a2, sp, 16
 ; NODOT-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; NODOT-NEXT:    csrr a2, vlenb
 ; NODOT-NEXT:    slli a2, a2, 3
@@ -1183,21 +1188,16 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    add a2, sp, a2
 ; NODOT-NEXT:    addi a2, a2, 16
 ; NODOT-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; NODOT-NEXT:    vslidedown.vx v8, v16, a1
-; NODOT-NEXT:    csrr a2, vlenb
-; NODOT-NEXT:    slli a2, a2, 3
-; NODOT-NEXT:    add a2, sp, a2
-; NODOT-NEXT:    addi a2, a2, 16
-; NODOT-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; NODOT-NEXT:    vslidedown.vx v16, v16, a1
 ; NODOT-NEXT:    csrr a2, vlenb
 ; NODOT-NEXT:    slli a2, a2, 5
 ; NODOT-NEXT:    add a2, sp, a2
 ; NODOT-NEXT:    addi a2, a2, 16
-; NODOT-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; NODOT-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; NODOT-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; NODOT-NEXT:    vsext.vf2 v16, v8
 ; NODOT-NEXT:    csrr a2, vlenb
-; NODOT-NEXT:    slli a2, a2, 3
+; NODOT-NEXT:    slli a2, a2, 5
 ; NODOT-NEXT:    add a2, sp, a2
 ; NODOT-NEXT:    addi a2, a2, 16
 ; NODOT-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
@@ -1207,10 +1207,13 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    slli a2, a2, 4
 ; NODOT-NEXT:    add a2, sp, a2
 ; NODOT-NEXT:    addi a2, a2, 16
-; NODOT-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; NODOT-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; NODOT-NEXT:    vslidedown.vx v16, v16, a1
-; NODOT-NEXT:    addi a2, sp, 16
+; NODOT-NEXT:    vslidedown.vx v16, v8, a1
+; NODOT-NEXT:    csrr a2, vlenb
+; NODOT-NEXT:    slli a2, a2, 3
+; NODOT-NEXT:    add a2, sp, a2
+; NODOT-NEXT:    addi a2, a2, 16
 ; NODOT-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vslidedown.vx v8, v8, a1
 ; NODOT-NEXT:    csrr a1, vlenb
@@ -1243,15 +1246,12 @@ define <64 x i32> @vqdotsu_vv_partial_v64i32_v256i8(<256 x i8> %a, <256 x i8> %b
 ; NODOT-NEXT:    addi a1, a1, 16
 ; NODOT-NEXT:    vl4r.v v8, (a1) # vscale x 32-byte Folded Reload
 ; NODOT-NEXT:    vwmaccsu.vv v0, v8, v20
-; NODOT-NEXT:    csrr a1, vlenb
-; NODOT-NEXT:    slli a1, a1, 5
-; NODOT-NEXT:    add a1, sp, a1
-; NODOT-NEXT:    addi a1, a1, 16
+; NODOT-NEXT:    addi a1, sp, 16
 ; NODOT-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 ; NODOT-NEXT:    vslidedown.vx v20, v8, a0
 ; NODOT-NEXT:    csrr a1, vlenb
-; NODOT-NEXT:    slli a1, a1, 3
+; NODOT-NEXT:    slli a1, a1, 5
 ; NODOT-NEXT:    add a1, sp, a1
 ; NODOT-NEXT:    addi a1, a1, 16
 ; NODOT-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
index 4512d809995a4..fc6be944e2473 100644
--- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
@@ -23,9 +23,9 @@ define <vscale x 1 x i8> @trunc_nxv1f32_to_si8(<vscale x 1 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_si8:
@@ -40,9 +40,9 @@ define <vscale x 1 x i8> @trunc_nxv1f32_to_si8(<vscale x 1 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
   %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i8>
@@ -62,9 +62,9 @@ define <vscale x 1 x i8> @trunc_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_ui8:
@@ -79,9 +79,9 @@ define <vscale x 1 x i8> @trunc_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
   %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i8>
@@ -92,15 +92,13 @@ define <vscale x 1 x i16> @trunc_nxv1f32_to_si16(<vscale x 1 x float> %x) {
 ; RV32-LABEL: trunc_nxv1f32_to_si16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_si16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
   %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i16>
@@ -111,15 +109,13 @@ define <vscale x 1 x i16> @trunc_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
 ; RV32-LABEL: trunc_nxv1f32_to_ui16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv1f32_to_ui16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
   %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i16>
@@ -217,9 +213,9 @@ define <vscale x 4 x i8> @trunc_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_si8:
@@ -234,9 +230,9 @@ define <vscale x 4 x i8> @trunc_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -256,9 +252,9 @@ define <vscale x 4 x i8> @trunc_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_ui8:
@@ -273,9 +269,9 @@ define <vscale x 4 x i8> @trunc_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -286,15 +282,13 @@ define <vscale x 4 x i16> @trunc_nxv4f32_to_si16(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_si16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_si16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -305,15 +299,13 @@ define <vscale x 4 x i16> @trunc_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_ui16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_ui16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -358,15 +350,15 @@ define <vscale x 4 x i64> @trunc_nxv4f32_to_si64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_si64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_si64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; RV64-NEXT:    vmv2r.v v10, v8
+; RV64-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -377,15 +369,15 @@ define <vscale x 4 x i64> @trunc_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: trunc_nxv4f32_to_ui64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: trunc_nxv4f32_to_ui64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; RV64-NEXT:    vmv2r.v v10, v8
+; RV64-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -413,9 +405,9 @@ define <vscale x 1 x i8> @ceil_nxv1f32_to_si8(<vscale x 1 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_si8:
@@ -432,9 +424,9 @@ define <vscale x 1 x i8> @ceil_nxv1f32_to_si8(<vscale x 1 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i8>
@@ -456,9 +448,9 @@ define <vscale x 1 x i8> @ceil_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v9, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_ui8:
@@ -475,9 +467,9 @@ define <vscale x 1 x i8> @ceil_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v9, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i8>
@@ -489,18 +481,16 @@ define <vscale x 1 x i16> @ceil_nxv1f32_to_si16(<vscale x 1 x float> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_si16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v9, v8
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i16>
@@ -512,18 +502,16 @@ define <vscale x 1 x i16> @ceil_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v9, v8
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv1f32_to_ui16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v9, v8
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
   %b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i16>
@@ -639,9 +627,9 @@ define <vscale x 4 x i8> @ceil_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_si8:
@@ -658,9 +646,9 @@ define <vscale x 4 x i8> @ceil_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -682,9 +670,9 @@ define <vscale x 4 x i8> @ceil_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_ui8:
@@ -701,9 +689,9 @@ define <vscale x 4 x i8> @ceil_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -715,18 +703,16 @@ define <vscale x 4 x i16> @ceil_nxv4f32_to_si16(<vscale x 4 x float> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v10, v8
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_si16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v10, v8
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -738,18 +724,16 @@ define <vscale x 4 x i16> @ceil_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    fsrmi a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v10, v8
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    fsrm a0
-; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_ui16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    fsrmi a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v10, v8
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    fsrm a0
-; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -802,18 +786,18 @@ define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: ceil_nxv4f32_to_si64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
+; RV32-NEXT:    vmv2r.v v10, v8
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfwcvt.x.f.v v8, v12
+; RV32-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV32-NEXT:    fsrm a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_si64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
+; RV64-NEXT:    vmv2r.v v10, v8
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfwcvt.x.f.v v8, v12
+; RV64-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV64-NEXT:    fsrm a0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
@@ -825,18 +809,18 @@ define <vscale x 4 x i64> @ceil_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: ceil_nxv4f32_to_ui64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
+; RV32-NEXT:    vmv2r.v v10, v8
 ; RV32-NEXT:    fsrmi a0, 3
-; RV32-NEXT:    vfwcvt.xu.f.v v8, v12
+; RV32-NEXT:    vfwcvt.xu.f.v v8, v10
 ; RV32-NEXT:    fsrm a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ceil_nxv4f32_to_ui64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
+; RV64-NEXT:    vmv2r.v v10, v8
 ; RV64-NEXT:    fsrmi a0, 3
-; RV64-NEXT:    vfwcvt.xu.f.v v8, v12
+; RV64-NEXT:    vfwcvt.xu.f.v v8, v10
 ; RV64-NEXT:    fsrm a0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
@@ -863,9 +847,9 @@ define <vscale x 4 x i8> @rint_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_si8:
@@ -880,9 +864,9 @@ define <vscale x 4 x i8> @rint_nxv4f32_to_si8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -902,9 +886,9 @@ define <vscale x 4 x i8> @rint_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV32-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_ui8:
@@ -919,9 +903,9 @@ define <vscale x 4 x i8> @rint_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; RV64-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i8>
@@ -932,15 +916,13 @@ define <vscale x 4 x i16> @rint_nxv4f32_to_si16(<vscale x 4 x float> %x) {
 ; RV32-LABEL: rint_nxv4f32_to_si16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v10, v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_si16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.x.f.w v10, v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -951,15 +933,13 @@ define <vscale x 4 x i16> @rint_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
 ; RV32-LABEL: rint_nxv4f32_to_ui16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vfncvt.xu.f.w v10, v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_ui16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vfncvt.xu.f.w v10, v8
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vfncvt.xu.f.w v8, v8
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i16>
@@ -1004,15 +984,15 @@ define <vscale x 4 x i64> @rint_nxv4f32_to_si64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: rint_nxv4f32_to_si64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vfwcvt.x.f.v v8, v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_si64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vfwcvt.x.f.v v8, v12
+; RV64-NEXT:    vmv2r.v v10, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
@@ -1023,15 +1003,15 @@ define <vscale x 4 x i64> @rint_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
 ; RV32-LABEL: rint_nxv4f32_to_ui64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vfwcvt.xu.f.v v8, v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vfwcvt.xu.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: rint_nxv4f32_to_ui64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vfwcvt.xu.f.v v8, v12
+; RV64-NEXT:    vmv2r.v v10, v8
+; RV64-NEXT:    vfwcvt.xu.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %x)
   %b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index 6ebb03ff0297e..128a140458e2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_floor_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 define <vscale x 32 x bfloat> @vp_floor_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_floor_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 2
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_floor_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,21 +681,19 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale
 define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv8f16:
@@ -753,21 +767,19 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca
 define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv16f16:
@@ -841,21 +853,19 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca
 define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_floor_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 2
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv32f16:
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va
 ;
 ; ZVFHMIN-LABEL: vp_floor_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a2, 2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,20 +1107,18 @@ define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1125,20 +1149,18 @@ define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: vp_floor_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1169,20 +1191,18 @@ define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: vp_floor_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1254,21 +1274,19 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc
 define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1298,21 +1316,19 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc
 define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1342,21 +1358,19 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc
 define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1386,21 +1400,19 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc
 define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_floor_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 2
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 3faaf210086cb..00c581c5426c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -31,9 +31,9 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
+; CHECK-NEXT:    vfmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
+; CHECK-NEXT:    vfmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -72,14 +72,14 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v10, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
+; CHECK-NEXT:    vmerge.vvm v12, v10, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -95,14 +95,14 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v12, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
+; CHECK-NEXT:    vmerge.vvm v16, v12, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -122,9 +122,9 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v16, v16, v8
+; CHECK-NEXT:    vfmax.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -133,155 +133,66 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 declare <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
 
 define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; ZVFH-LABEL: vfmax_nxv32bf16_vv:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 1
-; ZVFH-NEXT:    add a0, a0, a1
-; ZVFH-NEXT:    sub sp, sp, a0
-; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv8r.v v24, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmv8r.v v0, v8
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v24
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v0
-; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmv1r.v v0, v3
-; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v4
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vfmax.vv v16, v0, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFH-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFH-NEXT:    vfmax.vv v16, v8, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 1
-; ZVFH-NEXT:    add a0, a0, a1
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v4
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v0, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    ret
+; CHECK-LABEL: vfmax_nxv32bf16_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmv8r.v v0, v16
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v0
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v3, v8, v8
+; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmv1r.v v0, v3
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vfmax.vv v24, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vfmax.vv v16, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
@@ -311,9 +222,9 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -344,9 +255,9 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
@@ -372,14 +283,14 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
@@ -405,14 +316,14 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
@@ -442,9 +353,9 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
@@ -468,71 +379,59 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v3, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v0, v16
+; ZVFHMIN-NEXT:    vfmax.vv v24, v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -699,9 +598,9 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -726,13 +625,13 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vmv1r.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vmv1r.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %a, %a
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
@@ -758,13 +657,13 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vmv1r.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vmv1r.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %b, %b
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index f6b94b41103ef..0114d73abf189 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -30,9 +30,9 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; CHECK-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -51,9 +51,9 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
+; CHECK-NEXT:    vfmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -75,9 +75,9 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; CHECK-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -96,9 +96,9 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
+; CHECK-NEXT:    vfmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -114,17 +114,15 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v16, v14, v12, v0
+; CHECK-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v14, v0
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; CHECK-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -138,14 +136,14 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v10, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
+; CHECK-NEXT:    vmerge.vvm v12, v10, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -161,17 +159,15 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v24, v20, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v20, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -185,14 +181,14 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v12, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
+; CHECK-NEXT:    vmerge.vvm v16, v12, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -203,37 +199,20 @@ declare <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat>
 define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -251,9 +230,9 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v16, v16, v8
+; CHECK-NEXT:    vfmax.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -273,11 +252,6 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
@@ -287,58 +261,48 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    slli a3, a3, 3
+; CHECK-NEXT:    add a3, sp, a3
+; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB10_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB10_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v6, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -346,35 +310,16 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v24, v24, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
@@ -399,19 +344,14 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    mul a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmset.m v24
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v7, v24, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -422,32 +362,27 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
@@ -527,9 +462,9 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -558,9 +493,9 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -595,9 +530,9 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -626,9 +561,9 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -657,17 +592,15 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -691,14 +624,14 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -711,12 +644,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -729,17 +660,15 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -763,14 +692,14 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -783,12 +712,10 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -796,37 +723,20 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -854,9 +764,9 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -867,32 +777,15 @@ declare <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half>, <vs
 define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmax_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 16
-; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 3
-; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v7, v0
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    .cfi_def_cfa sp, 16
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16:
@@ -906,11 +799,6 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
@@ -920,58 +808,48 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    csrr a3, vlenb
+; ZVFHMIN-NEXT:    slli a3, a3, 3
+; ZVFHMIN-NEXT:    add a3, sp, a3
+; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v5
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB22_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v8, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -979,35 +857,16 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -1043,19 +902,14 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmset.m v24
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -1066,32 +920,27 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
@@ -1216,12 +1065,10 @@ define <vscale x 4 x float> @vfmax_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -1251,12 +1098,10 @@ define <vscale x 8 x float> @vfmax_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -1319,12 +1164,10 @@ define <vscale x 2 x double> @vfmax_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmax.vv v8, v8, v14, v0.t
@@ -1354,12 +1197,10 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmax.vv v8, v8, v20, v0.t
@@ -1387,32 +1228,15 @@ declare <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double>, <v
 define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmax_vv_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1441,14 +1265,13 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -1458,37 +1281,25 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vslidedown.vx v6, v0, a4
 ; CHECK-NEXT:    sub a4, a2, a1
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    sltu a3, a2, a4
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 3
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv8r.v v24, v16
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; CHECK-NEXT:    vl8re64.v v16, (a0)
-; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB40_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1496,31 +1307,22 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:  .LBB40_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmax.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmax.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
index dce5004d03e16..98e75bdb4f61f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -51,9 +51,9 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
@@ -66,9 +66,9 @@ define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -83,9 +83,9 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
+; CHECK-NEXT:    vfmax.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
@@ -98,9 +98,9 @@ define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vf v10, v10, fa5
+; CHECK-NEXT:    vfmax.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -115,9 +115,9 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
+; CHECK-NEXT:    vfmax.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
@@ -130,9 +130,9 @@ define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
+; CHECK-NEXT:    vfmax.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -147,9 +147,9 @@ define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
+; CHECK-NEXT:    vfmax.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
+; CHECK-NEXT:    vfmax.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
@@ -179,9 +179,9 @@ define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v16, fa5
+; CHECK-NEXT:    vfmax.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -207,9 +207,9 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v0, v8
+; CHECK-NEXT:    vfmax.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -248,9 +248,9 @@ define <vscale x 32 x bfloat> @vfmax_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v8, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
   ret <vscale x 1 x half> %vc
@@ -302,9 +302,9 @@ define <vscale x 1 x half> @vfmax_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -325,9 +325,9 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
   ret <vscale x 2 x half> %vc
@@ -346,9 +346,9 @@ define <vscale x 2 x half> @vfmax_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -369,9 +369,9 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
   ret <vscale x 4 x half> %vc
@@ -390,9 +390,9 @@ define <vscale x 4 x half> @vfmax_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -413,9 +413,9 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
   ret <vscale x 8 x half> %vc
@@ -434,9 +434,9 @@ define <vscale x 8 x half> @vfmax_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -457,9 +457,9 @@ define <vscale x 8 x half> @vfmax_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -480,9 +480,9 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmax.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
   ret <vscale x 16 x half> %vc
@@ -501,9 +501,9 @@ define <vscale x 16 x half> @vfmax_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -535,9 +535,9 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -582,9 +582,9 @@ define <vscale x 32 x half> @vfmax_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index 919d63ca6e31a..dc0fad7026670 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -31,9 +31,9 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
+; CHECK-NEXT:    vfmin.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
+; CHECK-NEXT:    vfmin.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -72,14 +72,14 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v10, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
+; CHECK-NEXT:    vmerge.vvm v12, v10, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -95,14 +95,14 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v12, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
+; CHECK-NEXT:    vmerge.vvm v16, v12, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -122,9 +122,9 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v16, v16, v8
+; CHECK-NEXT:    vfmin.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -133,155 +133,66 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 declare <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
 
 define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; ZVFH-LABEL: vfmin_nxv32bf16_vv:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 1
-; ZVFH-NEXT:    add a0, a0, a1
-; ZVFH-NEXT:    sub sp, sp, a0
-; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv8r.v v24, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmv8r.v v0, v8
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v24
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v0
-; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmv1r.v v0, v3
-; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v4
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vfmin.vv v16, v0, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFH-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFH-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFH-NEXT:    vfmin.vv v16, v8, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 4
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 1
-; ZVFH-NEXT:    add a0, a0, a1
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv1r.v v0, v3
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v4
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v0, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    ret
+; CHECK-LABEL: vfmin_nxv32bf16_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    sub sp, sp, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmv8r.v v0, v16
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v0
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16
+; CHECK-NEXT:    vmfeq.vv v3, v8, v8
+; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmv1r.v v0, v3
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vfmin.vv v24, v24, v0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmfeq.vv v7, v16, v16
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vfmin.vv v16, v8, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
@@ -311,9 +222,9 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -344,9 +255,9 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
@@ -372,14 +283,14 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
@@ -405,14 +316,14 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
@@ -442,9 +353,9 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
@@ -468,71 +379,59 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v3, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v16, v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v0, v16
+; ZVFHMIN-NEXT:    vfmin.vv v24, v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
@@ -699,9 +598,9 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call nnan <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -726,13 +625,13 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vmv1r.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFHMIN-NEXT:    vmv1r.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %a, %a
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
@@ -758,13 +657,13 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vmv1r.v v8, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vmv1r.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %b, %b
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index dc2dec55c4a1a..afc38f3f2d154 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -30,9 +30,9 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; CHECK-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -51,9 +51,9 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
+; CHECK-NEXT:    vfmin.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -75,9 +75,9 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; CHECK-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -96,9 +96,9 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; CHECK-NEXT:    vmfeq.vv v0, v8, v8
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
+; CHECK-NEXT:    vfmin.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -114,17 +114,15 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v16, v14, v12, v0
+; CHECK-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v12, v14, v0
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; CHECK-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -138,14 +136,14 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v10, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
+; CHECK-NEXT:    vmerge.vvm v12, v10, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -161,17 +159,15 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v24, v20, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v16, v20, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -185,14 +181,14 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v0, v12, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
+; CHECK-NEXT:    vmerge.vvm v16, v12, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -203,37 +199,20 @@ declare <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat>
 define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -251,9 +230,9 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v16, v16, v8
+; CHECK-NEXT:    vfmin.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -273,11 +252,6 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
@@ -287,58 +261,48 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    slli a3, a3, 3
+; CHECK-NEXT:    add a3, sp, a3
+; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB10_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB10_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v6, v8, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -346,35 +310,16 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v24, v24, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
@@ -399,19 +344,14 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    mul a1, a1, a2
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmset.m v24
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v7, v24, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -422,32 +362,27 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v6, v16, v16, v0.t
-; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmv8r.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
+; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
@@ -527,9 +462,9 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -558,9 +493,9 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -595,9 +530,9 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v11, v11, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v11, v9, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -626,9 +561,9 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -657,17 +592,15 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v14, v14, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v14, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v14, v14, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v14, v12, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v14, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v12, v14, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v10
-; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -691,14 +624,14 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -711,12 +644,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
-; ZVFH-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v13
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -729,17 +660,15 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v20, v20, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v20, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v20, v20, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v20, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v20, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v20, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -763,14 +692,14 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v12, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -783,12 +712,10 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
-; ZVFH-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v17
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -796,37 +723,20 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -854,9 +764,9 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
 ; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -867,32 +777,15 @@ declare <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half>, <vs
 define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vfmin_vv_nxv32f16:
 ; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    addi sp, sp, -16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 16
-; ZVFH-NEXT:    csrr a1, vlenb
-; ZVFH-NEXT:    slli a1, a1, 3
-; ZVFH-NEXT:    sub sp, sp, a1
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv1r.v v7, v0
-; ZVFH-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT:    addi a0, sp, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v25
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; ZVFH-NEXT:    vmv1r.v v0, v7
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    add sp, sp, a0
-; ZVFH-NEXT:    .cfi_def_cfa sp, 16
-; ZVFH-NEXT:    addi sp, sp, 16
-; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16:
@@ -906,11 +799,6 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
@@ -920,58 +808,48 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    csrr a3, vlenb
+; ZVFHMIN-NEXT:    slli a3, a3, 3
+; ZVFHMIN-NEXT:    add a3, sp, a3
+; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v5, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v5, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v5
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB22_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v8, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -979,35 +857,16 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v24, v24, v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24, v0.t
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -1043,19 +902,14 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmset.m v24
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v7, v24, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -1066,32 +920,27 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v6, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vmfeq.vv v6, v24, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v24, v8, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vmfeq.vv v12, v16, v16, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v12
-; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8, v0.t
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v24, v0
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v7
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
@@ -1216,12 +1065,10 @@ define <vscale x 4 x float> @vfmin_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -1251,12 +1098,10 @@ define <vscale x 8 x float> @vfmin_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -1319,12 +1164,10 @@ define <vscale x 2 x double> @vfmin_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vmfeq.vv v13, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v14, v8, v10, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vv v13, v10, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v13
+; CHECK-NEXT:    vmfeq.vv v0, v10, v10, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vfmin.vv v8, v8, v14, v0.t
@@ -1354,12 +1197,10 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vmfeq.vv v17, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v20, v8, v12, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfeq.vv v17, v12, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v17
+; CHECK-NEXT:    vmfeq.vv v0, v12, v12, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vfmin.vv v8, v8, v20, v0.t
@@ -1387,32 +1228,15 @@ declare <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double>, <v
 define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfmin_vv_nxv8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
 ; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v8, v16, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1441,14 +1265,13 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a3, 24
-; CHECK-NEXT:    mul a1, a1, a3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -1458,37 +1281,25 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    vslidedown.vx v6, v0, a4
 ; CHECK-NEXT:    sub a4, a2, a1
 ; CHECK-NEXT:    add a3, a0, a3
-; CHECK-NEXT:    vl8re64.v v8, (a3)
+; CHECK-NEXT:    vl8re64.v v24, (a3)
 ; CHECK-NEXT:    sltu a3, a2, a4
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v26, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v26
-; CHECK-NEXT:    vmerge.vvm v24, v16, v8, v0
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 3
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv8r.v v24, v16
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    vmfeq.vv v5, v8, v8, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v24, v24, v0.t
 ; CHECK-NEXT:    vl8re64.v v16, (a0)
-; CHECK-NEXT:    vmv1r.v v0, v5
-; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v6
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    bltu a2, a1, .LBB40_2
 ; CHECK-NEXT:  # %bb.1:
@@ -1496,31 +1307,22 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:  .LBB40_2:
 ; CHECK-NEXT:    vmv1r.v v0, v7
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v25, v8, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmfeq.vv v0, v8, v8, v0.t
 ; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmfeq.vv v0, v16, v16, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfmin.vv v8, v16, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vfmin.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 24
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    .cfi_def_cfa sp, 16
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
index fcb8ad82342d5..59ddecafe72a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -51,9 +51,9 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
@@ -66,9 +66,9 @@ define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -83,9 +83,9 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
+; CHECK-NEXT:    vfmax.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
@@ -98,9 +98,9 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vf v10, v10, fa5
+; CHECK-NEXT:    vfmax.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -115,9 +115,9 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
+; CHECK-NEXT:    vfmax.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
@@ -130,9 +130,9 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
+; CHECK-NEXT:    vfmax.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -147,9 +147,9 @@ define <vscale x 8 x bfloat> @vfadd_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
+; CHECK-NEXT:    vfmax.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
+; CHECK-NEXT:    vfmax.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
@@ -179,9 +179,9 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v16, fa5
+; CHECK-NEXT:    vfmax.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -207,9 +207,9 @@ define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v0, v8
+; CHECK-NEXT:    vfmax.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -248,9 +248,9 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v8, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
   ret <vscale x 1 x half> %vc
@@ -302,9 +302,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -325,9 +325,9 @@ define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
   ret <vscale x 2 x half> %vc
@@ -346,9 +346,9 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -369,9 +369,9 @@ define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
   ret <vscale x 4 x half> %vc
@@ -390,9 +390,9 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -413,9 +413,9 @@ define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
   ret <vscale x 8 x half> %vc
@@ -434,9 +434,9 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -457,9 +457,9 @@ define <vscale x 8 x half> @vfadd_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -480,9 +480,9 @@ define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmax.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
   ret <vscale x 16 x half> %vc
@@ -501,9 +501,9 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -535,9 +535,9 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -582,9 +582,9 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index f9ac53b76ebaf..4d59302a76140 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -305,9 +305,9 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfwcvt.rtz.x.f.v v10, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT:    vmax.vx v10, v10, zero
+; CHECK-V-NEXT:    vmax.vx v8, v10, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
@@ -481,18 +481,18 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -661,18 +661,18 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -861,9 +861,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -926,9 +926,9 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
 ; CHECK-V-LABEL: stest_f64i16:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v9, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i32>
@@ -962,9 +962,9 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
 ; CHECK-V-LABEL: utest_f64i16:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v9, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i32>
@@ -1000,8 +1000,8 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-V-LABEL: ustest_f64i16:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT:    vmax.vx v8, v9, zero
+; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -1542,25 +1542,25 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
@@ -1884,25 +1884,25 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
@@ -2265,9 +2265,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
@@ -3864,9 +3864,9 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfwcvt.rtz.x.f.v v10, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT:    vmax.vx v10, v10, zero
+; CHECK-V-NEXT:    vmax.vx v8, v10, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
@@ -4038,18 +4038,18 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -4216,18 +4216,18 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -4415,9 +4415,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a1, a0, 1
 ; CHECK-V-NEXT:    add a0, a1, a0
@@ -4478,9 +4478,9 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-LABEL: stest_f64i16_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v9, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i32>
@@ -4512,9 +4512,9 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-LABEL: utest_f64i16_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v9, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i32>
@@ -4549,8 +4549,8 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-LABEL: ustest_f64i16_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT:    vmax.vx v8, v9, zero
+; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -5084,25 +5084,25 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
@@ -5424,25 +5424,25 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v10, a0
+; CHECK-V-NEXT:    vmv.s.x v8, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
+; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
+; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
@@ -5804,9 +5804,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 2
 ; CHECK-V-NEXT:    add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
index bc45671077106..6a904a82f8822 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll
@@ -56,8 +56,8 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
     ret <vscale x 4 x i16> %x
@@ -69,8 +69,8 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
     ret <vscale x 8 x i16> %x
@@ -118,8 +118,8 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
     ret <vscale x 2 x i32> %x
@@ -131,8 +131,8 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
     ret <vscale x 4 x i32> %x
@@ -144,8 +144,8 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
     ret <vscale x 8 x i32> %x
@@ -157,9 +157,9 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
@@ -172,9 +172,9 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v16, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
@@ -284,9 +284,9 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
@@ -299,9 +299,9 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
index bb5ad6ba9d88a..189407bc0016a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll
@@ -56,8 +56,8 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
     ret <vscale x 4 x i16> %x
@@ -69,8 +69,8 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
     ret <vscale x 8 x i16> %x
@@ -118,8 +118,8 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmerge.vim v8, v10, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
     ret <vscale x 2 x i32> %x
@@ -131,8 +131,8 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmerge.vim v8, v12, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
     ret <vscale x 4 x i32> %x
@@ -144,8 +144,8 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmerge.vim v8, v16, 0, v0
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
     ret <vscale x 8 x i32> %x
@@ -157,9 +157,9 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
@@ -172,9 +172,9 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v16, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
@@ -284,9 +284,9 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
@@ -299,9 +299,9 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmfne.vv v0, v8, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index ae0542fb5b74f..261c1bf0005a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i8> @trunc_nxv1f16_to_si8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: trunc_nxv1f16_to_si8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -26,8 +25,7 @@ define <vscale x 1 x i8> @trunc_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: trunc_nxv1f16_to_ui8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -132,8 +130,7 @@ define <vscale x 4 x i8> @trunc_nxv4f16_to_si8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_si8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -144,8 +141,7 @@ define <vscale x 4 x i8> @trunc_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_ui8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -178,8 +174,8 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_si32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_si32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -190,8 +186,8 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: trunc_nxv4f16_to_ui32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -210,9 +206,9 @@ define <vscale x 4 x i64> @trunc_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i64>
@@ -231,9 +227,9 @@ define <vscale x 4 x i64> @trunc_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i64>
@@ -251,9 +247,8 @@ define <vscale x 1 x i8> @ceil_nxv1f16_to_si8(<vscale x 1 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -265,9 +260,8 @@ define <vscale x 1 x i8> @ceil_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -453,9 +447,8 @@ define <vscale x 4 x i8> @ceil_nxv4f16_to_si8(<vscale x 4 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -467,9 +460,8 @@ define <vscale x 4 x i8> @ceil_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fsrmi a0, 3
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -506,9 +498,9 @@ define <vscale x 4 x i32> @ceil_nxv4f16_to_si32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: ceil_nxv4f16_to_si32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
@@ -520,9 +512,9 @@ define <vscale x 4 x i32> @ceil_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: ceil_nxv4f16_to_ui32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 3
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
@@ -544,9 +536,9 @@ define <vscale x 4 x i64> @ceil_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: ceil_nxv4f16_to_si64:
 ; RV32:       # %bb.0:
@@ -601,9 +593,9 @@ define <vscale x 4 x i64> @ceil_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: ceil_nxv4f16_to_ui64:
 ; RV32:       # %bb.0:
@@ -654,8 +646,7 @@ define <vscale x 1 x i8> @rint_nxv1f16_to_si8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_si8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x)
   %b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -666,8 +657,7 @@ define <vscale x 1 x i8> @rint_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: rint_nxv1f16_to_ui8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x)
   %b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i8>
@@ -840,8 +830,7 @@ define <vscale x 4 x i8> @rint_nxv4f16_to_si8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -852,8 +841,7 @@ define <vscale x 4 x i8> @rint_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i8>
@@ -886,8 +874,8 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_si32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_si32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -898,8 +886,8 @@ define <vscale x 4 x i32> @rint_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
 ; CHECK-LABEL: rint_nxv4f16_to_ui32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
   %b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i32>
@@ -918,9 +906,9 @@ define <vscale x 4 x i64> @rint_nxv4f16_to_si64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: rint_nxv4f16_to_si64:
 ; RV32:       # %bb.0:
@@ -973,9 +961,9 @@ define <vscale x 4 x i64> @rint_nxv4f16_to_ui64(<vscale x 4 x half> %x) {
 ; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: rint_nxv4f16_to_ui64:
 ; RV32:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
index e7b85c5d6902b..f7634d548e370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
@@ -7,11 +7,11 @@ define void @interleave256(ptr %agg.result, ptr %0, ptr %1) {
 ; RV64-1024:       # %bb.0: # %entry
 ; RV64-1024-NEXT:    li a3, 128
 ; RV64-1024-NEXT:    vsetvli zero, a3, e16, m2, ta, ma
-; RV64-1024-NEXT:    vle16.v v12, (a1)
-; RV64-1024-NEXT:    vle16.v v14, (a2)
+; RV64-1024-NEXT:    vle16.v v10, (a1)
+; RV64-1024-NEXT:    vle16.v v12, (a2)
 ; RV64-1024-NEXT:    li a1, -1
-; RV64-1024-NEXT:    vwaddu.vv v8, v12, v14
-; RV64-1024-NEXT:    vwmaccu.vx v8, a1, v14
+; RV64-1024-NEXT:    vwaddu.vv v8, v10, v12
+; RV64-1024-NEXT:    vwmaccu.vx v8, a1, v12
 ; RV64-1024-NEXT:    li a1, 256
 ; RV64-1024-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RV64-1024-NEXT:    vse16.v v8, (a0)
@@ -21,11 +21,11 @@ define void @interleave256(ptr %agg.result, ptr %0, ptr %1) {
 ; RV64-2048:       # %bb.0: # %entry
 ; RV64-2048-NEXT:    li a3, 128
 ; RV64-2048-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
-; RV64-2048-NEXT:    vle16.v v10, (a1)
-; RV64-2048-NEXT:    vle16.v v11, (a2)
+; RV64-2048-NEXT:    vle16.v v9, (a1)
+; RV64-2048-NEXT:    vle16.v v10, (a2)
 ; RV64-2048-NEXT:    li a1, -1
-; RV64-2048-NEXT:    vwaddu.vv v8, v10, v11
-; RV64-2048-NEXT:    vwmaccu.vx v8, a1, v11
+; RV64-2048-NEXT:    vwaddu.vv v8, v9, v10
+; RV64-2048-NEXT:    vwmaccu.vx v8, a1, v10
 ; RV64-2048-NEXT:    li a1, 256
 ; RV64-2048-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
 ; RV64-2048-NEXT:    vse16.v v8, (a0)
@@ -45,11 +45,11 @@ define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr {
 ; RV64-1024:       # %bb.0: # %entry
 ; RV64-1024-NEXT:    li a3, 256
 ; RV64-1024-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; RV64-1024-NEXT:    vle16.v v16, (a1)
-; RV64-1024-NEXT:    vle16.v v20, (a2)
+; RV64-1024-NEXT:    vle16.v v12, (a1)
+; RV64-1024-NEXT:    vle16.v v16, (a2)
 ; RV64-1024-NEXT:    li a1, -1
-; RV64-1024-NEXT:    vwaddu.vv v8, v16, v20
-; RV64-1024-NEXT:    vwmaccu.vx v8, a1, v20
+; RV64-1024-NEXT:    vwaddu.vv v8, v12, v16
+; RV64-1024-NEXT:    vwmaccu.vx v8, a1, v16
 ; RV64-1024-NEXT:    li a1, 512
 ; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; RV64-1024-NEXT:    vse16.v v8, (a0)
@@ -59,11 +59,11 @@ define void @interleave512(ptr %agg.result, ptr %0, ptr %1) local_unnamed_addr {
 ; RV64-2048:       # %bb.0: # %entry
 ; RV64-2048-NEXT:    li a3, 256
 ; RV64-2048-NEXT:    vsetvli zero, a3, e16, m2, ta, ma
-; RV64-2048-NEXT:    vle16.v v12, (a1)
-; RV64-2048-NEXT:    vle16.v v14, (a2)
+; RV64-2048-NEXT:    vle16.v v10, (a1)
+; RV64-2048-NEXT:    vle16.v v12, (a2)
 ; RV64-2048-NEXT:    li a1, -1
-; RV64-2048-NEXT:    vwaddu.vv v8, v12, v14
-; RV64-2048-NEXT:    vwmaccu.vx v8, a1, v14
+; RV64-2048-NEXT:    vwaddu.vv v8, v10, v12
+; RV64-2048-NEXT:    vwmaccu.vx v8, a1, v12
 ; RV64-2048-NEXT:    li a1, 512
 ; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; RV64-2048-NEXT:    vse16.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
index c5e1852923d54..488b2cbe4765b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll
@@ -7,8 +7,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v10, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v10, v0
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
   ret <vscale x 16 x i1> %r
@@ -21,8 +21,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8>
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v10, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v14
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v8, v14
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -36,14 +36,14 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v15, v14
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v12, v14
 ; CHECK-NEXT:    vrgather.vi v12, v10, 2
 ; CHECK-NEXT:    vmseq.vv v15, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 3
 ; CHECK-NEXT:    vmor.mm v10, v14, v15
-; CHECK-NEXT:    vmseq.vv v11, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v11
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v10, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -57,26 +57,26 @@ define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8>
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v15, v14
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v12, v14
 ; CHECK-NEXT:    vrgather.vi v12, v10, 2
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 3
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 4
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 5
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 6
 ; CHECK-NEXT:    vmseq.vv v15, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 7
 ; CHECK-NEXT:    vmor.mm v10, v14, v15
-; CHECK-NEXT:    vmseq.vv v11, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v11
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v10, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -90,50 +90,50 @@ define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v15, v14
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v12, v14
 ; CHECK-NEXT:    vrgather.vi v12, v10, 2
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 3
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 4
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 5
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 6
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 7
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 8
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 9
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 10
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 11
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 12
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 13
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 14
 ; CHECK-NEXT:    vmseq.vv v15, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 15
 ; CHECK-NEXT:    vmor.mm v10, v14, v15
-; CHECK-NEXT:    vmseq.vv v11, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v11
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v10, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -318,26 +318,26 @@ define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16>
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v15, v14
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v12, v14
 ; CHECK-NEXT:    vrgather.vi v12, v10, 2
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 3
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 4
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 5
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v14, v15
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v14, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 6
 ; CHECK-NEXT:    vmseq.vv v15, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 7
 ; CHECK-NEXT:    vmor.mm v10, v14, v15
-; CHECK-NEXT:    vmseq.vv v11, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v11
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v10, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask)
@@ -454,34 +454,34 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; RV32-NEXT:    vrgather.vi v14, v10, 1
-; RV32-NEXT:    vrgather.vi v16, v10, 0
-; RV32-NEXT:    vrgather.vi v18, v10, 2
-; RV32-NEXT:    vrgather.vi v20, v10, 3
-; RV32-NEXT:    vrgather.vi v22, v10, 4
-; RV32-NEXT:    vrgather.vi v24, v10, 5
-; RV32-NEXT:    vrgather.vi v26, v10, 6
-; RV32-NEXT:    vrgather.vi v28, v10, 7
-; RV32-NEXT:    vmseq.vv v12, v8, v14
-; RV32-NEXT:    vmseq.vv v13, v8, v16
-; RV32-NEXT:    vrgather.vi v30, v10, 8
-; RV32-NEXT:    vmseq.vv v14, v8, v18
-; RV32-NEXT:    vmseq.vv v15, v8, v20
-; RV32-NEXT:    vrgather.vi v6, v10, 9
-; RV32-NEXT:    vmseq.vv v16, v8, v22
-; RV32-NEXT:    vmseq.vv v17, v8, v24
-; RV32-NEXT:    vrgather.vi v24, v10, 10
-; RV32-NEXT:    vmseq.vv v18, v8, v26
-; RV32-NEXT:    vmseq.vv v19, v8, v28
-; RV32-NEXT:    vrgather.vi v26, v10, 11
-; RV32-NEXT:    vmseq.vv v20, v8, v30
-; RV32-NEXT:    vmseq.vv v21, v8, v6
-; RV32-NEXT:    vrgather.vi v28, v10, 12
-; RV32-NEXT:    vmseq.vv v22, v8, v24
-; RV32-NEXT:    vmseq.vv v23, v8, v26
-; RV32-NEXT:    vrgather.vi v26, v10, 13
-; RV32-NEXT:    vmseq.vv v25, v8, v28
-; RV32-NEXT:    vmseq.vv v24, v8, v26
+; RV32-NEXT:    vrgather.vi v12, v10, 1
+; RV32-NEXT:    vrgather.vi v14, v10, 0
+; RV32-NEXT:    vrgather.vi v16, v10, 2
+; RV32-NEXT:    vrgather.vi v18, v10, 3
+; RV32-NEXT:    vrgather.vi v20, v10, 4
+; RV32-NEXT:    vrgather.vi v22, v10, 5
+; RV32-NEXT:    vrgather.vi v24, v10, 6
+; RV32-NEXT:    vrgather.vi v26, v10, 7
+; RV32-NEXT:    vmseq.vv v12, v8, v12
+; RV32-NEXT:    vmseq.vv v13, v8, v14
+; RV32-NEXT:    vrgather.vi v28, v10, 8
+; RV32-NEXT:    vmseq.vv v14, v8, v16
+; RV32-NEXT:    vmseq.vv v15, v8, v18
+; RV32-NEXT:    vrgather.vi v30, v10, 9
+; RV32-NEXT:    vmseq.vv v16, v8, v20
+; RV32-NEXT:    vmseq.vv v17, v8, v22
+; RV32-NEXT:    vrgather.vi v22, v10, 10
+; RV32-NEXT:    vmseq.vv v18, v8, v24
+; RV32-NEXT:    vmseq.vv v19, v8, v26
+; RV32-NEXT:    vrgather.vi v24, v10, 11
+; RV32-NEXT:    vmseq.vv v20, v8, v28
+; RV32-NEXT:    vmseq.vv v21, v8, v30
+; RV32-NEXT:    vrgather.vi v26, v10, 12
+; RV32-NEXT:    vmseq.vv v22, v8, v22
+; RV32-NEXT:    vmseq.vv v23, v8, v24
+; RV32-NEXT:    vrgather.vi v28, v10, 13
+; RV32-NEXT:    vmseq.vv v25, v8, v26
+; RV32-NEXT:    vmseq.vv v24, v8, v28
 ; RV32-NEXT:    vslidedown.vi v26, v10, 16
 ; RV32-NEXT:    vmv.x.s a0, v26
 ; RV32-NEXT:    vslidedown.vi v26, v10, 17
@@ -563,8 +563,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; RV32-NEXT:    vmor.mm v10, v10, v12
 ; RV32-NEXT:    vmor.mm v10, v10, v13
 ; RV32-NEXT:    vmor.mm v10, v10, v14
-; RV32-NEXT:    vmseq.vx v11, v8, s0
-; RV32-NEXT:    vmor.mm v8, v10, v11
+; RV32-NEXT:    vmseq.vx v8, v8, s0
+; RV32-NEXT:    vmor.mm v8, v10, v8
 ; RV32-NEXT:    vmand.mm v0, v8, v0
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    .cfi_restore s0
@@ -579,34 +579,34 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset s0, -8
 ; RV64-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; RV64-NEXT:    vrgather.vi v14, v10, 1
-; RV64-NEXT:    vrgather.vi v16, v10, 0
-; RV64-NEXT:    vrgather.vi v18, v10, 2
-; RV64-NEXT:    vrgather.vi v20, v10, 3
-; RV64-NEXT:    vrgather.vi v22, v10, 4
-; RV64-NEXT:    vrgather.vi v24, v10, 5
-; RV64-NEXT:    vrgather.vi v26, v10, 6
-; RV64-NEXT:    vrgather.vi v28, v10, 7
-; RV64-NEXT:    vmseq.vv v12, v8, v14
-; RV64-NEXT:    vmseq.vv v13, v8, v16
-; RV64-NEXT:    vrgather.vi v30, v10, 8
-; RV64-NEXT:    vmseq.vv v14, v8, v18
-; RV64-NEXT:    vmseq.vv v15, v8, v20
-; RV64-NEXT:    vrgather.vi v6, v10, 9
-; RV64-NEXT:    vmseq.vv v16, v8, v22
-; RV64-NEXT:    vmseq.vv v17, v8, v24
-; RV64-NEXT:    vrgather.vi v24, v10, 10
-; RV64-NEXT:    vmseq.vv v18, v8, v26
-; RV64-NEXT:    vmseq.vv v19, v8, v28
-; RV64-NEXT:    vrgather.vi v26, v10, 11
-; RV64-NEXT:    vmseq.vv v20, v8, v30
-; RV64-NEXT:    vmseq.vv v21, v8, v6
-; RV64-NEXT:    vrgather.vi v28, v10, 12
-; RV64-NEXT:    vmseq.vv v22, v8, v24
-; RV64-NEXT:    vmseq.vv v23, v8, v26
-; RV64-NEXT:    vrgather.vi v26, v10, 13
-; RV64-NEXT:    vmseq.vv v25, v8, v28
-; RV64-NEXT:    vmseq.vv v24, v8, v26
+; RV64-NEXT:    vrgather.vi v12, v10, 1
+; RV64-NEXT:    vrgather.vi v14, v10, 0
+; RV64-NEXT:    vrgather.vi v16, v10, 2
+; RV64-NEXT:    vrgather.vi v18, v10, 3
+; RV64-NEXT:    vrgather.vi v20, v10, 4
+; RV64-NEXT:    vrgather.vi v22, v10, 5
+; RV64-NEXT:    vrgather.vi v24, v10, 6
+; RV64-NEXT:    vrgather.vi v26, v10, 7
+; RV64-NEXT:    vmseq.vv v12, v8, v12
+; RV64-NEXT:    vmseq.vv v13, v8, v14
+; RV64-NEXT:    vrgather.vi v28, v10, 8
+; RV64-NEXT:    vmseq.vv v14, v8, v16
+; RV64-NEXT:    vmseq.vv v15, v8, v18
+; RV64-NEXT:    vrgather.vi v30, v10, 9
+; RV64-NEXT:    vmseq.vv v16, v8, v20
+; RV64-NEXT:    vmseq.vv v17, v8, v22
+; RV64-NEXT:    vrgather.vi v22, v10, 10
+; RV64-NEXT:    vmseq.vv v18, v8, v24
+; RV64-NEXT:    vmseq.vv v19, v8, v26
+; RV64-NEXT:    vrgather.vi v24, v10, 11
+; RV64-NEXT:    vmseq.vv v20, v8, v28
+; RV64-NEXT:    vmseq.vv v21, v8, v30
+; RV64-NEXT:    vrgather.vi v26, v10, 12
+; RV64-NEXT:    vmseq.vv v22, v8, v22
+; RV64-NEXT:    vmseq.vv v23, v8, v24
+; RV64-NEXT:    vrgather.vi v28, v10, 13
+; RV64-NEXT:    vmseq.vv v25, v8, v26
+; RV64-NEXT:    vmseq.vv v24, v8, v28
 ; RV64-NEXT:    vslidedown.vi v26, v10, 16
 ; RV64-NEXT:    vmv.x.s a0, v26
 ; RV64-NEXT:    vslidedown.vi v26, v10, 17
@@ -688,8 +688,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; RV64-NEXT:    vmor.mm v10, v10, v12
 ; RV64-NEXT:    vmor.mm v10, v10, v13
 ; RV64-NEXT:    vmor.mm v10, v10, v14
-; RV64-NEXT:    vmseq.vx v11, v8, s0
-; RV64-NEXT:    vmor.mm v8, v10, v11
+; RV64-NEXT:    vmseq.vx v8, v8, s0
+; RV64-NEXT:    vmor.mm v8, v10, v8
 ; RV64-NEXT:    vmand.mm v0, v8, v0
 ; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    .cfi_restore s0
@@ -965,14 +965,14 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v15, v8, v12
-; CHECK-NEXT:    vmor.mm v14, v15, v14
+; CHECK-NEXT:    vmseq.vv v12, v8, v12
+; CHECK-NEXT:    vmor.mm v14, v12, v14
 ; CHECK-NEXT:    vrgather.vi v12, v10, 2
 ; CHECK-NEXT:    vmseq.vv v15, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 3
 ; CHECK-NEXT:    vmor.mm v10, v14, v15
-; CHECK-NEXT:    vmseq.vv v11, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v11
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v10, v8
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 4 x i1> @llvm.experimental.vector.match(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask)
@@ -986,8 +986,8 @@ define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64
 ; CHECK-NEXT:    vrgather.vi v12, v10, 1
 ; CHECK-NEXT:    vmseq.vv v14, v8, v12
 ; CHECK-NEXT:    vrgather.vi v12, v10, 0
-; CHECK-NEXT:    vmseq.vv v10, v8, v12
-; CHECK-NEXT:    vmor.mm v8, v10, v14
+; CHECK-NEXT:    vmseq.vv v8, v8, v12
+; CHECK-NEXT:    vmor.mm v8, v8, v14
 ; CHECK-NEXT:    vmand.mm v0, v8, v0
 ; CHECK-NEXT:    ret
   %r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask)
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
index 2a46a0bdb8655..4fff8e32db87a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
@@ -6,9 +6,9 @@ define <vscale x 4 x i5> @trunc_nxv4i32_to_nxv4i5(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: trunc_nxv4i32_to_nxv4i5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = trunc <vscale x 4 x i32> %a to <vscale x 4 x i5>
   ret <vscale x 4 x i5> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
index 66c64c6b275f4..ac1b8edf26869 100644
--- a/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll
@@ -20,8 +20,8 @@ define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %a = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x)
   ret <vscale x 2 x i64> %a
@@ -32,8 +32,8 @@ define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x) {
 ; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x)
   ret <vscale x 4 x i64> %a
@@ -44,8 +44,8 @@ define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x) {
 ; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %a = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x)
   ret <vscale x 8 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
index dbe2d03e1a909..cbd5c2a04a796 100644
--- a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll
@@ -20,8 +20,8 @@ define <vscale x 2 x i64> @llrint_nxv2i64_nxv2f32(<vscale x 2 x float> %x, <vsca
 ; CHECK-LABEL: llrint_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %a = call <vscale x 2 x i64> @llvm.vp.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %a
@@ -32,8 +32,8 @@ define <vscale x 4 x i64> @llrint_nxv4i64_nxv4f32(<vscale x 4 x float> %x, <vsca
 ; CHECK-LABEL: llrint_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %a = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i64> %a
@@ -44,8 +44,8 @@ define <vscale x 8 x i64> @llrint_nxv8i64_nxv8f32(<vscale x 8 x float> %x, <vsca
 ; CHECK-LABEL: llrint_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %a = call <vscale x 8 x i64> @llvm.vp.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i64> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
index 1108bb16b6712..c8e64f16710af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
@@ -61,9 +61,9 @@ define <vscale x 32 x bfloat> @load_nxv32bf16(ptr %p) {
 define <vscale x 4 x float> @extload(ptr %p) {
 ; CHECK-LABEL: extload:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl1re16.v v10, (a0)
+; CHECK-NEXT:    vl1re16.v v9, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %x = load <vscale x 4 x bfloat>, ptr %p
   %y = fpext <vscale x 4 x bfloat> %x to <vscale x 4 x float>
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
index 534fdf21675ce..6d31645d7cb8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll
@@ -46,8 +46,8 @@ define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x) {
 ; RV64-i64-LABEL: lrint_nxv2f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-i64-NEXT:    vmv1r.v v10, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10
+; RV64-i64-NEXT:    vmv1r.v v9, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v9
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 2 x iXLen> @llvm.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x)
   ret <vscale x 2 x iXLen> %a
@@ -70,8 +70,8 @@ define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x) {
 ; RV64-i64-LABEL: lrint_nxv4f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-i64-NEXT:    vmv2r.v v12, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12
+; RV64-i64-NEXT:    vmv2r.v v10, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 4 x iXLen> @llvm.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x)
   ret <vscale x 4 x iXLen> %a
@@ -94,8 +94,8 @@ define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x) {
 ; RV64-i64-LABEL: lrint_nxv8f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-i64-NEXT:    vmv4r.v v16, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v16
+; RV64-i64-NEXT:    vmv4r.v v12, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 8 x iXLen> @llvm.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x)
   ret <vscale x 8 x iXLen> %a
@@ -131,15 +131,13 @@ define <vscale x 1 x iXLen> @lrint_nxv1f64(<vscale x 1 x double> %x) {
 ; RV32-LABEL: lrint_nxv1f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv1f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8
-; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv1f64:
@@ -156,15 +154,13 @@ define <vscale x 2 x iXLen> @lrint_nxv2f64(<vscale x 2 x double> %x) {
 ; RV32-LABEL: lrint_nxv2f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v10, v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv2f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v10, v8
-; RV64-i32-NEXT:    vmv.v.v v8, v10
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv2f64:
@@ -181,15 +177,13 @@ define <vscale x 4 x iXLen> @lrint_nxv4f64(<vscale x 4 x double> %x) {
 ; RV32-LABEL: lrint_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v12, v8
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv4f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v12, v8
-; RV64-i32-NEXT:    vmv.v.v v8, v12
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv4f64:
@@ -206,15 +200,13 @@ define <vscale x 8 x iXLen> @lrint_nxv8f64(<vscale x 8 x double> %x) {
 ; RV32-LABEL: lrint_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v16, v8
-; RV32-NEXT:    vmv.v.v v8, v16
+; RV32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv8f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v16, v8
-; RV64-i32-NEXT:    vmv.v.v v8, v16
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv8f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
index 98d32b36c23c1..57897c92885e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll
@@ -46,8 +46,8 @@ define <vscale x 2 x iXLen> @lrint_nxv2f32(<vscale x 2 x float> %x, <vscale x 2
 ; RV64-i64-LABEL: lrint_nxv2f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i64-NEXT:    vmv1r.v v10, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
+; RV64-i64-NEXT:    vmv1r.v v9, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 2 x iXLen> @llvm.vp.lrint.nxv2iXLen.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x iXLen> %a
@@ -70,8 +70,8 @@ define <vscale x 4 x iXLen> @lrint_nxv4f32(<vscale x 4 x float> %x, <vscale x 4
 ; RV64-i64-LABEL: lrint_nxv4f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i64-NEXT:    vmv2r.v v12, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
+; RV64-i64-NEXT:    vmv2r.v v10, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 4 x iXLen> @llvm.vp.lrint.nxv4iXLen.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x iXLen> %a
@@ -94,8 +94,8 @@ define <vscale x 8 x iXLen> @lrint_nxv8f32(<vscale x 8 x float> %x, <vscale x 8
 ; RV64-i64-LABEL: lrint_nxv8f32:
 ; RV64-i64:       # %bb.0:
 ; RV64-i64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-i64-NEXT:    vmv4r.v v16, v8
-; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
+; RV64-i64-NEXT:    vmv4r.v v12, v8
+; RV64-i64-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
 ; RV64-i64-NEXT:    ret
   %a = call <vscale x 8 x iXLen> @llvm.vp.lrint.nxv8iXLen.nxv8f32(<vscale x 8 x float> %x, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x iXLen> %a
@@ -146,15 +146,13 @@ define <vscale x 1 x iXLen> @lrint_nxv1f64(<vscale x 1 x double> %x, <vscale x 1
 ; RV32-LABEL: lrint_nxv1f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv1f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v9, v8, v0.t
-; RV64-i32-NEXT:    vmv1r.v v8, v9
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv1f64:
@@ -171,15 +169,13 @@ define <vscale x 2 x iXLen> @lrint_nxv2f64(<vscale x 2 x double> %x, <vscale x 2
 ; RV32-LABEL: lrint_nxv2f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv2f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v10, v8, v0.t
-; RV64-i32-NEXT:    vmv.v.v v8, v10
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv2f64:
@@ -196,15 +192,13 @@ define <vscale x 4 x iXLen> @lrint_nxv4f64(<vscale x 4 x double> %x, <vscale x 4
 ; RV32-LABEL: lrint_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv4f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v12, v8, v0.t
-; RV64-i32-NEXT:    vmv.v.v v8, v12
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv4f64:
@@ -221,15 +215,13 @@ define <vscale x 8 x iXLen> @lrint_nxv8f64(<vscale x 8 x double> %x, <vscale x 8
 ; RV32-LABEL: lrint_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vfncvt.x.f.w v16, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v16
+; RV32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-i32-LABEL: lrint_nxv8f64:
 ; RV64-i32:       # %bb.0:
 ; RV64-i32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-i32-NEXT:    vfncvt.x.f.w v16, v8, v0.t
-; RV64-i32-NEXT:    vmv.v.v v8, v16
+; RV64-i32-NEXT:    vfncvt.x.f.w v8, v8, v0.t
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_nxv8f64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
index 4b56518a89d69..8725e5f22c6b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
@@ -1072,8 +1072,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16(
@@ -1095,8 +1094,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32(
@@ -1118,8 +1116,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 2757e140ecde5..e110672c72337 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -204,15 +204,13 @@ define <vscale x 4 x i8> @mgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, <vsc
 ; RV32-LABEL: mgather_truemask_nxv4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i8> %passthru)
   ret <vscale x 4 x i8> %v
@@ -427,15 +425,13 @@ define <vscale x 4 x i16> @mgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, <v
 ; RV32-LABEL: mgather_truemask_nxv4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i16> %passthru)
   ret <vscale x 4 x i16> %v
@@ -610,10 +606,9 @@ define <vscale x 2 x i64> @mgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %
 ; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT:    vmv1r.v v10, v9
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vsext.vf2 v8, v10
+; RV32-NEXT:    vsext.vf2 v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64:
@@ -632,10 +627,9 @@ define <vscale x 2 x i64> @mgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %
 ; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT:    vmv1r.v v10, v9
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vzext.vf2 v8, v10
+; RV32-NEXT:    vzext.vf2 v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64:
@@ -680,8 +674,7 @@ define <vscale x 4 x i32> @mgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, <v
 ; RV64-LABEL: mgather_truemask_nxv4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i32> %passthru)
   ret <vscale x 4 x i32> %v
@@ -941,8 +934,8 @@ define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <v
 ; RV32-LABEL: mgather_truemask_nxv4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i64:
@@ -1192,8 +1185,8 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64>
 ; RV32-LABEL: mgather_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
-; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1326,15 +1319,13 @@ define <vscale x 4 x bfloat> @mgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptrs
 ; RV32-LABEL: mgather_truemask_nxv4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x bfloat> %passthru)
   ret <vscale x 4 x bfloat> %v
@@ -1529,15 +1520,13 @@ define <vscale x 4 x half> @mgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, <
 ; RV32-LABEL: mgather_truemask_nxv4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x half> %passthru)
   ret <vscale x 4 x half> %v
@@ -1738,8 +1727,7 @@ define <vscale x 4 x float> @mgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs,
 ; RV64-LABEL: mgather_truemask_nxv4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x float> %passthru)
   ret <vscale x 4 x float> %v
@@ -1999,8 +1987,8 @@ define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs,
 ; RV32-LABEL: mgather_truemask_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4f64:
@@ -2250,8 +2238,8 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i
 ; RV32-LABEL: mgather_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
-; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index ac26a014aaa64..e97a6c7521b0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -10,10 +10,10 @@ define void @complex_gep(ptr %p, <vscale x 2 x i64> %vec.ind, <vscale x 2 x i1>
 ; RV32-LABEL: complex_gep:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v10, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    addi a0, a0, 28
-; RV32-NEXT:    vmul.vx v8, v10, a1
+; RV32-NEXT:    vmul.vx v8, v8, a1
 ; RV32-NEXT:    vmv.v.i v9, 0
 ; RV32-NEXT:    vsoxei32.v v9, (a0), v8, v0.t
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index 11229edc9fa46..a7daaab9f6de9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -94,9 +94,9 @@ define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
@@ -105,9 +105,9 @@ define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
@@ -257,18 +257,18 @@ define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
@@ -458,15 +458,15 @@ define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
-; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
-; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
   call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
@@ -963,8 +963,8 @@ define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscal
 ; RV32-LABEL: mscatter_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vsll.vi v16, v24, 3
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1862,8 +1862,8 @@ define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vs
 ; RV32-LABEL: mscatter_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vsll.vi v16, v24, 3
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1949,11 +1949,11 @@ define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double
 define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vl2r.v v6, (a1)
+; RV32-NEXT:    vl2r.v v30, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vsext.vf4 v24, v6
+; RV32-NEXT:    vsext.vf4 v24, v30
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
@@ -1989,10 +1989,10 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vsc
 define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    vl4re16.v v28, (a1)
 ; RV32-NEXT:    li a1, 8
 ; RV32-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulsu.vx v24, v4, a1
+; RV32-NEXT:    vwmulsu.vx v24, v28, a1
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    srli a1, a1, 3
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
index 557e809f4f9a8..984dc11013737 100644
--- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
@@ -12,9 +12,9 @@ define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 4
+; CHECK-NEXT:    vsll.vi v8, v12, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
@@ -32,9 +32,9 @@ define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 14
+; CHECK-NEXT:    vsll.vi v8, v12, 14
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
@@ -52,9 +52,9 @@ define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 26
+; CHECK-NEXT:    vsll.vi v8, v12, 26
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
@@ -74,9 +74,9 @@ define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vs
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8, v0.t
-; CHECK-NEXT:    vsll.vi v12, v12, 4
+; CHECK-NEXT:    vsll.vi v8, v12, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = call <vscale x 4 x i64> @llvm.vp.zext.nxvi64.nxv1i8(<vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i32 %vl)
@@ -101,9 +101,9 @@ define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vsll.vi v10, v9, 12
+; CHECK-NEXT:    vsll.vi v9, v9, 12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei16.v v8, (a0), v10
+; CHECK-NEXT:    vloxei16.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i16>
@@ -124,9 +124,9 @@ define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64
 ; CHECK-NEXT:    vand.vx v12, v8, a2
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v8, v12
-; CHECK-NEXT:    vsll.vi v12, v8, 4
+; CHECK-NEXT:    vsll.vi v8, v8, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i7> %offset to <vscale x 4 x i64>
@@ -145,9 +145,9 @@ define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsll.vi v12, v8, 2
+; CHECK-NEXT:    vsll.vi v8, v8, 2
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i1> %offset to <vscale x 4 x i64>
@@ -173,9 +173,9 @@ define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset,
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 4
+; CHECK-NEXT:    vsll.vi v8, v12, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v8, (a0), v12, v0.t
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
@@ -200,9 +200,9 @@ define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 4
+; CHECK-NEXT:    vsll.vi v8, v12, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v8, (a0), v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
@@ -228,9 +228,9 @@ define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset,
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vsll.vi v12, v12, 4
+; CHECK-NEXT:    vsll.vi v8, v12, 4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index 9bb5717d6fc25..3c012fdecbd80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16(<vscale x 32 x bfloat> %va
 define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bf
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    frflags a2
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    fsflags a2
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bf
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    fsflags a0
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,20 +681,18 @@ declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vsc
 define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
@@ -753,20 +767,18 @@ declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, <
 define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
@@ -841,20 +853,18 @@ declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, <
 define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_nearbyint_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    frflags a0
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    fsflags a0
 ; ZVFH-NEXT:    ret
 ;
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ;
 ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    frflags a2
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    fsflags a2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    fsflags a0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,19 +1107,17 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vsc
 ; CHECK-LABEL: vp_nearbyint_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1125,19 +1149,17 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vsc
 ; CHECK-LABEL: vp_nearbyint_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1169,19 +1191,17 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, <
 ; CHECK-LABEL: vp_nearbyint_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
@@ -1254,20 +1274,18 @@ declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>,
 define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
@@ -1298,20 +1316,18 @@ declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>,
 define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
@@ -1342,20 +1358,18 @@ declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>,
 define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
@@ -1386,20 +1400,18 @@ declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>,
 define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_nearbyint_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    frflags a0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    fsflags a0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    frflags a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index 948df1948ccef..1cfcb1d642913 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -10,17 +10,17 @@ define <vscale x 4 x i8> @foo(ptr %p) {
 ; CHECK-NEXT:    vmv.v.x v8, a0
 ; CHECK-NEXT:    li a0, 248
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vsll.vi v12, v10, 3
+; CHECK-NEXT:    vsll.vi v11, v10, 3
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v12
+; CHECK-NEXT:    vzext.vf2 v10, v11
 ; CHECK-NEXT:    vand.vx v10, v10, a0
 ; CHECK-NEXT:    lui a0, 1
 ; CHECK-NEXT:    addi a0, a0, -361
 ; CHECK-NEXT:    vmacc.vx v8, a0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 15
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %i13 = load <vscale x 4 x i16>, ptr %p, align 2
   %i14 = zext <vscale x 4 x i16> %i13 to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
index ab9849631663c..0acfaf49a1c9b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
@@ -148,8 +148,8 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-NEXT:    vslideup.vi v8, v16, 1
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v16, v8, 0
-; CHECK-NEXT:    vmv.x.s a0, v16
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    sb a0, 0(zero)
 ; CHECK-NEXT:    li a0, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index a9505dca97529..20aef8a1bf282 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -325,6 +325,12 @@ define <vscale x 32 x bfloat> @vp_rint_nxv32bf16(<vscale x 32 x bfloat> %va, <vs
 define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -334,33 +340,37 @@ define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -370,6 +380,12 @@ define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -613,19 +629,17 @@ declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x
 define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv8f16:
@@ -693,19 +707,17 @@ declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscal
 define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv16f16:
@@ -773,19 +785,17 @@ declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscal
 define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_rint_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v16
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv32f16:
@@ -861,6 +871,12 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ;
 ; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -870,33 +886,37 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -906,6 +926,12 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -993,18 +1019,16 @@ define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x
 ; CHECK-LABEL: vp_rint_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1033,18 +1057,16 @@ define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x
 ; CHECK-LABEL: vp_rint_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1073,18 +1095,16 @@ define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscal
 ; CHECK-LABEL: vp_rint_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1150,19 +1170,17 @@ declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vsca
 define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1190,19 +1208,17 @@ declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vsca
 define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1230,19 +1246,17 @@ declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vsca
 define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1270,19 +1284,17 @@ declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vsca
 define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_rint_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1318,16 +1330,14 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index ccbc0ebb3b73e..8f0cce09dfd51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_round_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 4
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,21 +681,19 @@ declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale
 define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv8f16:
@@ -753,21 +767,19 @@ declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vsca
 define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv16f16:
@@ -841,21 +853,19 @@ declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vsca
 define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_round_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 4
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv32f16:
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ;
 ; ZVFHMIN-LABEL: vp_round_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a2, 4
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,20 +1107,18 @@ define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: vp_round_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1125,20 +1149,18 @@ define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: vp_round_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1169,20 +1191,18 @@ define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: vp_round_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1254,21 +1274,19 @@ declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vsc
 define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1298,21 +1316,19 @@ declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vsc
 define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1342,21 +1358,19 @@ declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vsc
 define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1386,21 +1400,19 @@ declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vsc
 define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_round_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 4
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 4
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index 3975423e6f985..c2c56403851c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16(<vscale x 32 x bfloat> %va
 define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16_unmasked(<vscale x 32 x bf
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 0
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16_unmasked(<vscale x 32 x bf
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,21 +681,19 @@ declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vsc
 define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv8f16:
@@ -753,21 +767,19 @@ declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, <
 define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv16f16:
@@ -841,21 +853,19 @@ declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, <
 define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundeven_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 0
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16:
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ;
 ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a2, 0
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,20 +1107,18 @@ define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vsc
 ; CHECK-LABEL: vp_roundeven_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1125,20 +1149,18 @@ define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vsc
 ; CHECK-LABEL: vp_roundeven_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1169,20 +1191,18 @@ define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, <
 ; CHECK-LABEL: vp_roundeven_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1254,21 +1274,19 @@ declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>,
 define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1298,21 +1316,19 @@ declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>,
 define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1342,21 +1358,19 @@ declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>,
 define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1386,21 +1400,19 @@ declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>,
 define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundeven_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 0
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index 7f617f48862c4..0ae719c88692c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -349,6 +349,12 @@ define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16(<vscale x 32 x bfloat> %
 define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv32bf16_unmasked:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmset.m v16
@@ -358,35 +364,39 @@ define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16_unmasked(<vscale x 32 x
 ; CHECK-NEXT:    fmv.w.x fa5, a3
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v7, v16, a2
+; CHECK-NEXT:    vslidedown.vx v16, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
-; CHECK-NEXT:    vmv1r.v v6, v7
+; CHECK-NEXT:    vmv1r.v v17, v16
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    addi a3, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfabs.v v16, v24, v0.t
+; CHECK-NEXT:    vfabs.v v8, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 1
-; CHECK-NEXT:    vmv1r.v v0, v6
+; CHECK-NEXT:    vmv1r.v v0, v17
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; CHECK-NEXT:    fsrm a2
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v7
+; CHECK-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
 ; CHECK-NEXT:    bltu a0, a1, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16
 ; CHECK-NEXT:    vmflt.vf v0, v24, fa5
@@ -398,6 +408,12 @@ define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16_unmasked(<vscale x 32 x
 ; CHECK-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    .cfi_def_cfa sp, 16
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x bfloat> %v
@@ -665,21 +681,19 @@ declare <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half>, <v
 define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv8f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI18_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI18_0)(a0)
-; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v10, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v10
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16:
@@ -753,21 +767,19 @@ declare <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half>,
 define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv16f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv1r.v v12, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI20_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI20_0)(a0)
-; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v12, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v12
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16:
@@ -841,21 +853,19 @@ declare <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half>,
 define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
 ; ZVFH-LABEL: vp_roundtozero_nxv32f16:
 ; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a1)
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT:    vmv1r.v v16, v0
-; ZVFH-NEXT:    lui a0, %hi(.LCPI22_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI22_0)(a0)
-; ZVFH-NEXT:    vfabs.v v24, v8, v0.t
+; ZVFH-NEXT:    vfabs.v v16, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; ZVFH-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; ZVFH-NEXT:    fsrmi a0, 1
-; ZVFH-NEXT:    vmv1r.v v0, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
-; ZVFH-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; ZVFH-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; ZVFH-NEXT:    fsrm a0
-; ZVFH-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; ZVFH-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; ZVFH-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16:
@@ -937,6 +947,12 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ;
 ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16_unmasked:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a1
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v16
@@ -946,35 +962,39 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ; ZVFHMIN-NEXT:    fmv.w.x fa5, a3
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v7, v16, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
-; ZVFHMIN-NEXT:    vmv1r.v v6, v7
+; ZVFHMIN-NEXT:    vmv1r.v v17, v16
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
+; ZVFHMIN-NEXT:    addi a3, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfabs.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vmflt.vf v6, v16, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v17, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    fsrmi a2, 1
-; ZVFHMIN-NEXT:    vmv1r.v v0, v6
+; ZVFHMIN-NEXT:    vmv1r.v v0, v17
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    fsrm a2
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v24, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vfsgnj.vv v24, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vmv1r.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB23_2:
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfabs.v v24, v16
 ; ZVFHMIN-NEXT:    vmflt.vf v0, v24, fa5
@@ -986,6 +1006,12 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
 ; ZVFHMIN-NEXT:    vfsgnj.vv v16, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 32 x half> %v
@@ -1081,20 +1107,18 @@ define <vscale x 4 x float> @vp_roundtozero_nxv4f32(<vscale x 4 x float> %va, <v
 ; CHECK-LABEL: vp_roundtozero_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1125,20 +1149,18 @@ define <vscale x 8 x float> @vp_roundtozero_nxv8f32(<vscale x 8 x float> %va, <v
 ; CHECK-LABEL: vp_roundtozero_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1169,20 +1191,18 @@ define <vscale x 16 x float> @vp_roundtozero_nxv16f32(<vscale x 16 x float> %va,
 ; CHECK-LABEL: vp_roundtozero_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    lui a0, 307200
 ; CHECK-NEXT:    fmv.w.x fa5, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x float> %v
@@ -1254,21 +1274,19 @@ declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double>
 define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv2f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
-; CHECK-NEXT:    vfabs.v v12, v8, v0.t
+; CHECK-NEXT:    vfabs.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v10, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v10, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v10, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v10, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -1298,21 +1316,19 @@ declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double>
 define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv4f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
-; CHECK-NEXT:    vfabs.v v16, v8, v0.t
+; CHECK-NEXT:    vfabs.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v12, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v12, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x double> %v
@@ -1342,21 +1358,19 @@ declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double>
 define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv7f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
   ret <vscale x 7 x double> %v
@@ -1386,21 +1400,19 @@ declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double>
 define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vp_roundtozero_nxv8f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a1)
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v0
-; CHECK-NEXT:    lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT:    vfabs.v v24, v8, v0.t
+; CHECK-NEXT:    vfabs.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v16, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v16, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a0, 1
-; CHECK-NEXT:    vmv1r.v v0, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vfcvt.x.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfcvt.x.f.v v16, v8, v0.t
 ; CHECK-NEXT:    fsrm a0
-; CHECK-NEXT:    vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v16, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT:    vfsgnj.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
@@ -1438,17 +1450,15 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
 ; CHECK-NEXT:    srli a3, a1, 3
 ; CHECK-NEXT:    fld fa5, %lo(.LCPI44_0)(a2)
 ; CHECK-NEXT:    sub a2, a0, a1
-; CHECK-NEXT:    vslidedown.vx v6, v0, a3
+; CHECK-NEXT:    vslidedown.vx v0, v0, a3
 ; CHECK-NEXT:    sltu a3, a0, a2
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; CHECK-NEXT:    vfabs.v v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v24, fa5, v0.t
 ; CHECK-NEXT:    fsrmi a2, 1
-; CHECK-NEXT:    vmv1r.v v0, v6
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfcvt.x.f.v v24, v16, v0.t
 ; CHECK-NEXT:    fsrm a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 9972df97ad9f4..7e53a7f8a09cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -669,8 +669,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3bf16(<vscale x 3 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3bf16(<vscale x 3 x bfloat> %va, <vscale x 3 x bfloat> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
   ret <vscale x 3 x i1> %v
@@ -685,8 +684,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vv v0, v16, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -699,8 +697,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -715,8 +712,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -729,10 +725,9 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -745,8 +740,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -761,8 +755,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -775,10 +768,9 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -791,8 +783,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -807,8 +798,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -823,8 +813,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -837,8 +826,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -853,8 +841,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -869,8 +856,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -883,8 +869,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -899,8 +884,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1160,9 +1144,9 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfle.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
@@ -1206,9 +1190,9 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v8, v8, v12, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
@@ -1254,8 +1238,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vv v0, v16, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1268,8 +1251,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1284,8 +1266,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, b
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vf v8, v12, fa5, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1371,8 +1352,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a3, a3, a1
-; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
@@ -1380,8 +1359,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv8r.v v0, v16
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    mv a3, a1
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    add a3, a3, a1
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:    add a1, sp, a1
@@ -1407,8 +1387,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    slli t0, t0, 1
 ; CHECK-NEXT:    mv t1, t0
 ; CHECK-NEXT:    slli t0, t0, 2
-; CHECK-NEXT:    add t1, t1, t0
-; CHECK-NEXT:    slli t0, t0, 1
 ; CHECK-NEXT:    add t0, t0, t1
 ; CHECK-NEXT:    add t0, sp, t0
 ; CHECK-NEXT:    addi t0, t0, 16
@@ -1416,8 +1394,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    vslidedown.vx v16, v8, a1
 ; CHECK-NEXT:    vl8re16.v v8, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv t0, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add t0, t0, a0
 ; CHECK-NEXT:    slli a0, a0, 2
 ; CHECK-NEXT:    add a0, a0, t0
 ; CHECK-NEXT:    add a0, sp, a0
@@ -1448,19 +1427,19 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v5, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, a6, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v7, v24, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v24, v8, v0.t
 ; CHECK-NEXT:    bltu a2, a5, .LBB85_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a2, a5
@@ -1470,48 +1449,48 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    mv a6, a5
 ; CHECK-NEXT:    slli a5, a5, 2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    add a5, a5, a6
 ; CHECK-NEXT:    add a5, sp, a5
 ; CHECK-NEXT:    addi a5, a5, 16
-; CHECK-NEXT:    vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vl1r.v v10, (a5) # vscale x 8-byte Folded Reload
 ; CHECK-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v8, a3
+; CHECK-NEXT:    vslidedown.vx v0, v10, a3
 ; CHECK-NEXT:    sltu a5, a2, a0
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a0, a5, a0
 ; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    mv a6, a5
+; CHECK-NEXT:    slli a5, a5, 1
+; CHECK-NEXT:    add a6, a6, a5
 ; CHECK-NEXT:    slli a5, a5, 3
 ; CHECK-NEXT:    add a5, a5, a6
 ; CHECK-NEXT:    add a5, sp, a5
 ; CHECK-NEXT:    addi a5, a5, 16
-; CHECK-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a5, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add a5, a5, a0
 ; CHECK-NEXT:    slli a0, a0, 2
 ; CHECK-NEXT:    add a0, a0, a5
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v16, v24, v0.t
-; CHECK-NEXT:    vmv1r.v v9, v7
+; CHECK-NEXT:    vmfeq.vv v9, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v9, v5, a3
+; CHECK-NEXT:    vslideup.vx v8, v5, a3
 ; CHECK-NEXT:    bltu a2, a4, .LBB85_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    mv a2, a4
 ; CHECK-NEXT:  .LBB85_6:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add a4, a4, a0
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, a0, a4
 ; CHECK-NEXT:    add a0, sp, a0
@@ -1520,31 +1499,22 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add a2, a2, a0
 ; CHECK-NEXT:    slli a0, a0, 2
 ; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    mv a2, a0
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add a2, a2, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a2
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    vmfeq.vv v0, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v10, a3
+; CHECK-NEXT:    vslideup.vx v0, v9, a3
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
-; CHECK-NEXT:    vmv.v.v v0, v8
+; CHECK-NEXT:    vslideup.vx v0, v8, a1
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
@@ -2521,8 +2491,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f16(<vscale x 3 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f16(<vscale x 3 x half> %va, <vscale x 3 x half> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
   ret <vscale x 3 x i1> %v
@@ -2534,8 +2503,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vv_nxv8f16:
@@ -2544,8 +2512,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2555,8 +2522,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_oeq_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16:
@@ -2565,8 +2531,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2578,8 +2543,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_oeq_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv8f16:
@@ -2588,8 +2552,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2601,18 +2564,16 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ogt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2622,8 +2583,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ogt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16:
@@ -2632,8 +2592,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2645,8 +2604,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ogt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv8f16:
@@ -2655,8 +2613,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2668,18 +2625,16 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_oge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2689,8 +2644,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_oge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16:
@@ -2699,8 +2653,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2712,8 +2665,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_oge_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv8f16:
@@ -2722,8 +2674,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2735,8 +2686,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_olt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vv_nxv8f16:
@@ -2745,8 +2695,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2756,8 +2705,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_olt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16:
@@ -2766,8 +2714,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2779,8 +2726,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_olt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv8f16:
@@ -2789,8 +2735,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2802,8 +2747,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ole_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vv_nxv8f16:
@@ -2812,8 +2756,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2823,8 +2766,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ole_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16:
@@ -2833,8 +2775,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2846,8 +2787,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ole_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv8f16:
@@ -2856,8 +2796,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2870,8 +2809,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16:
@@ -2893,8 +2832,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
@@ -2918,8 +2857,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv8f16:
@@ -2942,9 +2881,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v10, v10, v10, v0.t
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16:
@@ -2969,9 +2908,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v10, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
@@ -2999,9 +2938,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmand.mm v0, v12, v10
+; ZVFH-NEXT:    vmfeq.vf v10, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmand.mm v0, v10, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv8f16:
@@ -3028,8 +2967,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16:
@@ -3051,8 +2990,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
@@ -3076,8 +3015,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv8f16:
@@ -3100,8 +3039,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16:
@@ -3121,8 +3060,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
@@ -3144,8 +3083,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ugt_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv8f16:
@@ -3167,8 +3106,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16:
@@ -3188,8 +3127,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
@@ -3211,8 +3150,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_uge_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv8f16:
@@ -3234,17 +3173,17 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
@@ -3255,8 +3194,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
@@ -3278,8 +3217,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ult_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv8f16:
@@ -3301,17 +3240,17 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
@@ -3322,8 +3261,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
@@ -3345,8 +3284,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_ule_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv8f16:
@@ -3368,8 +3307,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_une_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v8, v10, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v12
+; ZVFH-NEXT:    vmfne.vv v0, v8, v10, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vv_nxv8f16:
@@ -3378,8 +3316,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vv v0, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3389,8 +3326,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-LABEL: fcmp_une_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16:
@@ -3399,8 +3335,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -3412,8 +3347,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-LABEL: fcmp_une_vf_swap_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v10, v8, fa0, v0.t
-; ZVFH-NEXT:    vmv1r.v v0, v10
+; ZVFH-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv8f16:
@@ -3422,8 +3356,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vf v8, v12, fa5, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -3435,9 +3368,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v10, v10, v10, v0.t
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16:
@@ -3462,9 +3395,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v10, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
@@ -3492,9 +3425,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0, v0.t
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8, v0.t
-; ZVFH-NEXT:    vmor.mm v0, v12, v10
+; ZVFH-NEXT:    vmfne.vf v10, v10, fa0, v0.t
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; ZVFH-NEXT:    vmor.mm v0, v10, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv8f16:
@@ -3544,7 +3477,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFH-NEXT:    vslidedown.vx v0, v0, a1
 ; ZVFH-NEXT:    and a4, a4, a5
 ; ZVFH-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v6, v16, v24, v0.t
+; ZVFH-NEXT:    vmfeq.vv v16, v16, v24, v0.t
 ; ZVFH-NEXT:    bltu a2, a3, .LBB171_2
 ; ZVFH-NEXT:  # %bb.1:
 ; ZVFH-NEXT:    mv a2, a3
@@ -3553,10 +3486,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFH-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v16, v24, v8, v0.t
+; ZVFH-NEXT:    vmfeq.vv v0, v24, v8, v0.t
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vslideup.vx v16, v6, a1
-; ZVFH-NEXT:    vmv.v.v v0, v16
+; ZVFH-NEXT:    vslideup.vx v0, v16, a1
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    add sp, sp, a0
@@ -3583,8 +3515,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    slli a1, a1, 1
 ; ZVFHMIN-NEXT:    mv a3, a1
 ; ZVFHMIN-NEXT:    slli a1, a1, 2
-; ZVFHMIN-NEXT:    add a3, a3, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
 ; ZVFHMIN-NEXT:    add a1, a1, a3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
@@ -3592,8 +3522,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 1
 ; ZVFHMIN-NEXT:    mv a3, a1
+; ZVFHMIN-NEXT:    slli a1, a1, 1
+; ZVFHMIN-NEXT:    add a3, a3, a1
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, a1, a3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
@@ -3619,8 +3550,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    slli t0, t0, 1
 ; ZVFHMIN-NEXT:    mv t1, t0
 ; ZVFHMIN-NEXT:    slli t0, t0, 2
-; ZVFHMIN-NEXT:    add t1, t1, t0
-; ZVFHMIN-NEXT:    slli t0, t0, 1
 ; ZVFHMIN-NEXT:    add t0, t0, t1
 ; ZVFHMIN-NEXT:    add t0, sp, t0
 ; ZVFHMIN-NEXT:    addi t0, t0, 16
@@ -3628,8 +3557,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vslidedown.vx v16, v8, a1
 ; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    mv t0, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add t0, t0, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 2
 ; ZVFHMIN-NEXT:    add a0, a0, t0
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -3660,19 +3590,19 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v5, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, a6, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v8, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    bltu a2, a5, .LBB171_4
 ; ZVFHMIN-NEXT:  # %bb.3:
 ; ZVFHMIN-NEXT:    mv a2, a5
@@ -3682,48 +3612,48 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    slli a5, a5, 1
 ; ZVFHMIN-NEXT:    mv a6, a5
 ; ZVFHMIN-NEXT:    slli a5, a5, 2
-; ZVFHMIN-NEXT:    add a6, a6, a5
-; ZVFHMIN-NEXT:    slli a5, a5, 1
 ; ZVFHMIN-NEXT:    add a5, a5, a6
 ; ZVFHMIN-NEXT:    add a5, sp, a5
 ; ZVFHMIN-NEXT:    addi a5, a5, 16
-; ZVFHMIN-NEXT:    vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; ZVFHMIN-NEXT:    vl1r.v v10, (a5) # vscale x 8-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v10, a3
 ; ZVFHMIN-NEXT:    sltu a5, a2, a0
 ; ZVFHMIN-NEXT:    addi a5, a5, -1
 ; ZVFHMIN-NEXT:    and a0, a5, a0
 ; ZVFHMIN-NEXT:    csrr a5, vlenb
-; ZVFHMIN-NEXT:    slli a5, a5, 1
 ; ZVFHMIN-NEXT:    mv a6, a5
+; ZVFHMIN-NEXT:    slli a5, a5, 1
+; ZVFHMIN-NEXT:    add a6, a6, a5
 ; ZVFHMIN-NEXT:    slli a5, a5, 3
 ; ZVFHMIN-NEXT:    add a5, a5, a6
 ; ZVFHMIN-NEXT:    add a5, sp, a5
 ; ZVFHMIN-NEXT:    addi a5, a5, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    mv a5, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a5, a5, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 2
 ; ZVFHMIN-NEXT:    add a0, a0, a5
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v16, v24, v0.t
-; ZVFHMIN-NEXT:    vmv1r.v v9, v7
+; ZVFHMIN-NEXT:    vmfeq.vv v9, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v9, v5, a3
+; ZVFHMIN-NEXT:    vslideup.vx v8, v5, a3
 ; ZVFHMIN-NEXT:    bltu a2, a4, .LBB171_6
 ; ZVFHMIN-NEXT:  # %bb.5:
 ; ZVFHMIN-NEXT:    mv a2, a4
 ; ZVFHMIN-NEXT:  .LBB171_6:
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    mv a4, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a4, a4, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, a0, a4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -3732,31 +3662,22 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    mv a2, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a2, a2, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 2
 ; ZVFHMIN-NEXT:    add a0, a0, a2
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    mv a2, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    add a2, a2, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a2
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; ZVFHMIN-NEXT:    vmv1r.v v0, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v10, a3
+; ZVFHMIN-NEXT:    vslideup.vx v0, v9, a3
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v8, v9, a1
-; ZVFHMIN-NEXT:    vmv.v.v v0, v8
+; ZVFHMIN-NEXT:    vslideup.vx v0, v8, a1
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 1
@@ -4302,8 +4223,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f64(<vscale x 3 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oeq_vv_nxv3f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f64(<vscale x 3 x double> %va, <vscale x 3 x double> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
   ret <vscale x 3 x i1> %v
@@ -4315,8 +4235,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfeq.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4326,8 +4245,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_oeq_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4339,8 +4257,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4352,8 +4269,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4363,8 +4279,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ogt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4376,8 +4291,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4389,8 +4303,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_oge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4400,8 +4313,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_oge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4413,8 +4325,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4426,8 +4337,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_olt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4437,8 +4347,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_olt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4450,8 +4359,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4463,8 +4371,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ole_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4474,8 +4381,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ole_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4487,8 +4393,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4501,8 +4406,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v25, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4513,8 +4418,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4527,8 +4432,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4540,9 +4445,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4554,9 +4459,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4570,9 +4475,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4585,8 +4490,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v25, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4597,8 +4502,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4611,8 +4516,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4624,8 +4529,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4635,8 +4540,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4648,8 +4553,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4661,8 +4566,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4672,8 +4577,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4685,8 +4590,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4698,8 +4603,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4709,8 +4614,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4722,8 +4627,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4735,8 +4640,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4746,8 +4651,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4759,8 +4664,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4772,8 +4677,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_une_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmfne.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4783,8 +4687,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_une_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4796,8 +4699,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4809,9 +4711,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -4823,9 +4725,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4839,9 +4741,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0, v0.t
-; CHECK-NEXT:    vmfne.vv v16, v8, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0, v0.t
+; CHECK-NEXT:    vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index ff923efe8eb43..7f1ba5279f084 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -91,9 +91,9 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -134,9 +134,9 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -162,9 +162,9 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -205,9 +205,9 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -666,9 +666,9 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -740,9 +740,9 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -768,9 +768,9 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x bfloat> %va, %vb
@@ -842,9 +842,9 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x bfloat> %va, %vb
@@ -1185,9 +1185,9 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ogt <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1246,9 +1246,9 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ogt <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1286,9 +1286,9 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp oge <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1347,9 +1347,9 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp oge <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -1583,8 +1583,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8
-; ZVFH-NEXT:    vmor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8
+; ZVFH-NEXT:    vmor.mm v0, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16:
@@ -1606,8 +1606,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
@@ -1631,8 +1631,8 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_one_fv_nxv8f16:
@@ -1695,9 +1695,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v10, v10, v10
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16:
@@ -1721,9 +1721,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
@@ -1749,9 +1749,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v12, v10
+; ZVFH-NEXT:    vmfeq.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v10, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_fv_nxv8f16:
@@ -1776,9 +1776,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16_nonans:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfeq.vv v12, v10, v10
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vv v10, v10, v10
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16_nonans:
@@ -1802,9 +1802,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfeq.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfeq.vv v10, v8, v8
-; ZVFH-NEXT:    vmand.mm v0, v10, v12
+; ZVFH-NEXT:    vmfeq.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfeq.vv v8, v8, v8
+; ZVFH-NEXT:    vmand.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16_nonans:
@@ -1830,8 +1830,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmflt.vv v13, v10, v8
-; ZVFH-NEXT:    vmnor.mm v0, v13, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8
+; ZVFH-NEXT:    vmnor.mm v0, v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16:
@@ -1853,8 +1853,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmfgt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
@@ -1878,8 +1878,8 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmflt.vf v11, v8, fa0
-; ZVFH-NEXT:    vmnor.mm v0, v11, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_fv_nxv8f16:
@@ -1942,8 +1942,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v8, v10
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v8, v8, v10
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16:
@@ -1963,8 +1963,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
@@ -1986,8 +1986,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ugt_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_fv_nxv8f16:
@@ -2016,9 +2016,9 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2049,8 +2049,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v8, v10
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v8, v8, v10
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16:
@@ -2070,8 +2070,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
@@ -2093,8 +2093,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_uge_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_fv_nxv8f16:
@@ -2123,9 +2123,9 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v12
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2156,17 +2156,17 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vv v12, v10, v8
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmfle.vv v8, v10, v8
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x half> %va, %vb
@@ -2177,8 +2177,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfge.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfge.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
@@ -2200,8 +2200,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ult_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfle.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfle.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_fv_nxv8f16:
@@ -2263,17 +2263,17 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vv v12, v10, v8
-; ZVFH-NEXT:    vmnot.m v0, v12
+; ZVFH-NEXT:    vmflt.vv v8, v10, v8
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x half> %va, %vb
@@ -2284,8 +2284,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfgt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmfgt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
@@ -2307,8 +2307,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH-LABEL: fcmp_ule_fv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmflt.vf v10, v8, fa0
-; ZVFH-NEXT:    vmnot.m v0, v10
+; ZVFH-NEXT:    vmflt.vf v8, v8, fa0
+; ZVFH-NEXT:    vmnot.m v0, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_fv_nxv8f16:
@@ -2471,9 +2471,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v10, v10, v10
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16:
@@ -2497,9 +2497,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
@@ -2525,9 +2525,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v12, v10
+; ZVFH-NEXT:    vmfne.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v10, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_fv_nxv8f16:
@@ -2552,9 +2552,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v
 ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16_nonans:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmfne.vv v12, v10, v10
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vv v10, v10, v10
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16_nonans:
@@ -2578,9 +2578,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v10, fa0
-; ZVFH-NEXT:    vmfne.vf v12, v10, fa0
-; ZVFH-NEXT:    vmfne.vv v10, v8, v8
-; ZVFH-NEXT:    vmor.mm v0, v10, v12
+; ZVFH-NEXT:    vmfne.vf v10, v10, fa0
+; ZVFH-NEXT:    vmfne.vv v8, v8, v8
+; ZVFH-NEXT:    vmor.mm v0, v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16_nonans:
@@ -2886,8 +2886,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp one <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2898,8 +2898,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2912,8 +2912,8 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2947,9 +2947,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -2960,9 +2960,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2975,9 +2975,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -2989,9 +2989,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32_nonans(<vscale x 8 x float> %va, <
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3002,9 +3002,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3017,8 +3017,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp ueq <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3029,8 +3029,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3043,8 +3043,8 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3078,8 +3078,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3089,8 +3089,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3102,8 +3102,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ugt_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3137,8 +3137,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3148,8 +3148,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3161,8 +3161,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_uge_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3196,8 +3196,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3207,8 +3207,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3220,8 +3220,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ult_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3255,8 +3255,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3266,8 +3266,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3279,8 +3279,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK-LABEL: fcmp_ule_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3370,9 +3370,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3383,9 +3383,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3398,9 +3398,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3412,9 +3412,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32_nonans(<vscale x 8 x float> %va, <
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x float> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3425,9 +3425,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -3720,8 +3720,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %vc = fcmp one <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3732,8 +3732,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3746,8 +3746,8 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3781,9 +3781,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3794,9 +3794,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3809,9 +3809,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3823,9 +3823,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp ord <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3836,9 +3836,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3851,8 +3851,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %vc = fcmp ueq <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3863,8 +3863,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3877,8 +3877,8 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3912,8 +3912,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ugt <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3923,8 +3923,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3936,8 +3936,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ugt_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3971,8 +3971,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp uge <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -3982,8 +3982,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -3995,8 +3995,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_uge_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4030,8 +4030,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ult <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -4041,8 +4041,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4054,8 +4054,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ult_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4089,8 +4089,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %vc = fcmp ule <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -4100,8 +4100,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4113,8 +4113,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmp_ule_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4204,9 +4204,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -4217,9 +4217,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4232,9 +4232,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4246,9 +4246,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64_nonans:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %vc = fcmp uno <vscale x 8 x double> %va, %vb
   ret <vscale x 8 x i1> %vc
@@ -4259,9 +4259,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64_nonans(<vscale x 8 x double> %va,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -4277,11 +4277,11 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
 ; RV32-NEXT:    fcvt.d.w fa5, zero
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT:    vmfeq.vf v24, v16, fa5
+; RV32-NEXT:    vmfeq.vf v16, v16, fa5
 ; RV32-NEXT:    vmfeq.vf v0, v8, fa5
 ; RV32-NEXT:    srli a0, a0, 3
 ; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV32-NEXT:    vslideup.vx v0, v24, a0
+; RV32-NEXT:    vslideup.vx v0, v16, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fcmp_oeq_vf_nx16f64:
@@ -4289,11 +4289,11 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
 ; RV64-NEXT:    fmv.d.x fa5, zero
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT:    vmfeq.vf v24, v16, fa5
+; RV64-NEXT:    vmfeq.vf v16, v16, fa5
 ; RV64-NEXT:    vmfeq.vf v0, v8, fa5
 ; RV64-NEXT:    srli a0, a0, 3
 ; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vslideup.vx v0, v24, a0
+; RV64-NEXT:    vslideup.vx v0, v16, a0
 ; RV64-NEXT:    ret
 ;
 ; ZVFHMIN32-LABEL: fcmp_oeq_vf_nx16f64:
@@ -4301,11 +4301,11 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
 ; ZVFHMIN32-NEXT:    fcvt.d.w fa5, zero
 ; ZVFHMIN32-NEXT:    csrr a0, vlenb
 ; ZVFHMIN32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; ZVFHMIN32-NEXT:    vmfeq.vf v24, v16, fa5
+; ZVFHMIN32-NEXT:    vmfeq.vf v16, v16, fa5
 ; ZVFHMIN32-NEXT:    vmfeq.vf v0, v8, fa5
 ; ZVFHMIN32-NEXT:    srli a0, a0, 3
 ; ZVFHMIN32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; ZVFHMIN32-NEXT:    vslideup.vx v0, v24, a0
+; ZVFHMIN32-NEXT:    vslideup.vx v0, v16, a0
 ; ZVFHMIN32-NEXT:    ret
 ;
 ; ZVFHMIN64-LABEL: fcmp_oeq_vf_nx16f64:
@@ -4313,11 +4313,11 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
 ; ZVFHMIN64-NEXT:    fmv.d.x fa5, zero
 ; ZVFHMIN64-NEXT:    csrr a0, vlenb
 ; ZVFHMIN64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; ZVFHMIN64-NEXT:    vmfeq.vf v24, v16, fa5
+; ZVFHMIN64-NEXT:    vmfeq.vf v16, v16, fa5
 ; ZVFHMIN64-NEXT:    vmfeq.vf v0, v8, fa5
 ; ZVFHMIN64-NEXT:    srli a0, a0, 3
 ; ZVFHMIN64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; ZVFHMIN64-NEXT:    vslideup.vx v0, v24, a0
+; ZVFHMIN64-NEXT:    vslideup.vx v0, v16, a0
 ; ZVFHMIN64-NEXT:    ret
   %vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
   ret <vscale x 16 x i1> %vc
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index 13c63d9c80a9a..d0a9c9b92b4aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -1098,7 +1098,7 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a4
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v6, v16, v24, v0.t
+; CHECK-NEXT:    vmseq.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a3, a1, .LBB96_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a1
@@ -1107,9 +1107,8 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v24, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v6
+; CHECK-NEXT:    vmseq.vv v0, v24, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -1134,16 +1133,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b,
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a1, .LBB97_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB97_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
   %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
@@ -1164,16 +1162,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_swap_nxv128i8(<vscale x 128 x i8> %va, i8
 ; CHECK-NEXT:    addi a4, a4, -1
 ; CHECK-NEXT:    and a3, a4, a3
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a2, a1, .LBB98_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB98_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v16
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0
   %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
@@ -1678,8 +1675,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1689,8 +1685,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs
 ; CHECK-LABEL: icmp_eq_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1702,8 +1697,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b
 ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1715,8 +1709,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1726,8 +1719,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal
 ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmseq.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1737,8 +1729,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1748,8 +1739,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs
 ; CHECK-LABEL: icmp_ne_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1761,8 +1751,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b
 ; CHECK-LABEL: icmp_ne_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1774,8 +1763,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1785,8 +1773,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal
 ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1796,8 +1783,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1807,8 +1793,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_ugt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1820,8 +1805,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_ugt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1833,8 +1817,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1844,8 +1827,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1855,8 +1837,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1866,9 +1847,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_uge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1880,8 +1860,7 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_uge_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1893,8 +1872,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1904,8 +1882,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1915,8 +1892,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsltu.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1926,8 +1902,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_ult_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1939,8 +1914,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_ult_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1952,8 +1926,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsleu.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1963,8 +1936,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1974,8 +1946,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -1985,8 +1956,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sgt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1998,8 +1968,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sgt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2011,8 +1980,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2022,8 +1990,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2033,8 +2000,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2044,9 +2010,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sge_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2058,8 +2023,7 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sge_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2071,8 +2035,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2082,8 +2045,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2093,8 +2055,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmslt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2104,8 +2065,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_slt_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2117,8 +2077,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_slt_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2130,8 +2089,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2141,8 +2099,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2152,8 +2109,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2163,8 +2119,7 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v
 ; CHECK-LABEL: icmp_sle_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vx v12, v8, a0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2176,9 +2131,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %
 ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -2190,8 +2144,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vi_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsle.vi v12, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2201,8 +2154,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca
 ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsgt.vi v12, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -2236,7 +2188,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a1
 ; CHECK-NEXT:    and a4, a4, a5
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v6, v16, v24, v0.t
+; CHECK-NEXT:    vmseq.vv v16, v16, v24, v0.t
 ; CHECK-NEXT:    bltu a2, a3, .LBB189_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a3
@@ -2245,10 +2197,9 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v16, v24, v8, v0.t
+; CHECK-NEXT:    vmseq.vv v0, v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v6, a1
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v16, a1
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -2274,17 +2225,16 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a1, a3, .LBB190_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:  .LBB190_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v25, a2
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v16, a2
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
@@ -2306,17 +2256,16 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
 ; CHECK-NEXT:    addi a5, a5, -1
 ; CHECK-NEXT:    and a4, a5, a4
 ; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v25, v16, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    bltu a1, a3, .LBB191_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:  .LBB191_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmseq.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v25, a2
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vslideup.vx v0, v16, a2
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
   %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
@@ -3073,8 +3022,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmseq.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3089,9 +3037,8 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmseq.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmseq.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3099,8 +3046,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV64-LABEL: icmp_eq_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3117,9 +3063,8 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmseq.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmseq.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3127,8 +3072,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV64-LABEL: icmp_eq_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmseq.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmseq.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3140,8 +3084,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_eq_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3151,8 +3094,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal
 ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmseq.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3162,8 +3104,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsne.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3178,9 +3119,8 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsne.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsne.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3188,8 +3128,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
 ; RV64-LABEL: icmp_ne_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsne.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3206,9 +3145,8 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsne.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsne.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3216,8 +3154,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
 ; RV64-LABEL: icmp_ne_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsne.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsne.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3229,8 +3166,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: icmp_ne_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3240,8 +3176,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal
 ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsne.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3251,8 +3186,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsltu.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3267,9 +3201,8 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3277,8 +3210,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_ugt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgtu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3295,9 +3227,8 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3305,8 +3236,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_ugt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsltu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3318,8 +3248,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ugt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3329,8 +3258,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3340,8 +3268,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3356,9 +3283,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3366,9 +3292,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_uge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsleu.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsleu.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3385,9 +3310,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsleu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3395,8 +3319,7 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_uge_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsleu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsleu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3408,8 +3331,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_uge_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3419,8 +3341,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3430,8 +3351,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsltu.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3446,9 +3366,8 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3456,8 +3375,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_ult_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsltu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsltu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3474,9 +3392,8 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsltu.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsltu.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3484,8 +3401,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_ult_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgtu.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgtu.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3497,8 +3413,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_ult_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsleu.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsleu.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3508,8 +3423,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgtu.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgtu.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3519,8 +3433,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmslt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3535,9 +3448,8 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3545,8 +3457,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_sgt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3563,9 +3474,8 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3573,8 +3483,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_sgt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmslt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3586,8 +3495,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sgt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3597,8 +3505,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3608,8 +3515,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3624,9 +3530,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3634,9 +3539,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_sge_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3653,9 +3557,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3663,8 +3566,7 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_sge_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3676,8 +3578,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sge_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3687,8 +3588,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3698,8 +3598,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmslt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3714,9 +3613,8 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3724,8 +3622,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_slt_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmslt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmslt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3742,9 +3639,8 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmslt.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmslt.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3752,8 +3648,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_slt_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsgt.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsgt.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3765,8 +3660,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_slt_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3776,8 +3670,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3787,8 +3680,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3803,9 +3695,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v8, v24, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v8, v16, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3813,8 +3704,7 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
 ; RV64-LABEL: icmp_sle_vx_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsle.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsle.vx v0, v8, a0, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3831,9 +3721,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
@@ -3841,9 +3730,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
 ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmv.v.x v24, a0
-; RV64-NEXT:    vmsle.vv v16, v24, v8, v0.t
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmv.v.x v16, a0
+; RV64-NEXT:    vmsle.vv v0, v16, v8, v0.t
 ; RV64-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -3855,8 +3743,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x
 ; CHECK-LABEL: icmp_sle_vi_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsle.vi v16, v8, 4, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsle.vi v0, v8, 4, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
@@ -3866,8 +3753,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca
 ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsgt.vi v16, v8, 3, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsgt.vi v0, v8, 3, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index a85b471530cc9..ba186a7c9aafe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -2999,11 +2999,11 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmseq.vi v24, v16, 0
+; CHECK-NEXT:    vmseq.vi v16, v16, 0
 ; CHECK-NEXT:    srli a0, a0, 3
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v24, a0
+; CHECK-NEXT:    vslideup.vx v0, v16, a0
 ; CHECK-NEXT:    ret
   %vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
   ret <vscale x 16 x i1> %vc
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index c216fb65a6a5b..85b597f171a63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5750,10 +5750,10 @@ define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f)
 ; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB125_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v10, (a0)
+; CHECK-NEXT:    vl1re32.v v9, (a0)
 ; CHECK-NEXT:    addi a1, a1, 4
 ; CHECK-NEXT:    addi a2, a2, -4
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    j .LBB125_1
@@ -5823,10 +5823,10 @@ define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f)
 ; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB127_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v10, (a0)
+; CHECK-NEXT:    vl1re32.v v9, (a0)
 ; CHECK-NEXT:    addi a1, a1, 4
 ; CHECK-NEXT:    addi a2, a2, -4
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    j .LBB127_1
diff --git a/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll
index 30cbf9a390319..7e7ccc0899299 100644
--- a/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll
@@ -62,8 +62,8 @@ define void @truncstore(<vscale x 4 x float> %v, ptr %p) {
 ; CHECK-LABEL: truncstore:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %w = fptrunc <vscale x 4 x float> %v to <vscale x 4 x bfloat>
   store <vscale x 4 x bfloat> %w, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 3ed437eeed2ff..b76986d811532 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -146,8 +146,8 @@ define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    lui a0, 8
 ; CHECK-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32767))
@@ -162,8 +162,8 @@ define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32768))
@@ -178,8 +178,8 @@ define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
@@ -198,8 +198,8 @@ define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
@@ -213,8 +213,8 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
@@ -231,8 +231,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
   %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 1))
@@ -250,8 +250,8 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
-; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
   %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
@@ -274,8 +274,8 @@ define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    slli a0, a0, 31
 ; CHECK-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483647))
@@ -290,8 +290,8 @@ define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483648))
@@ -306,8 +306,8 @@ define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
@@ -327,8 +327,8 @@ define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
@@ -342,8 +342,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
@@ -360,8 +360,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 16
   %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
@@ -378,8 +378,8 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
-; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-NEXT:    vs2r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 16
   %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
@@ -394,9 +394,9 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -412,9 +412,9 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v10, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -430,9 +430,9 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -449,9 +449,9 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -469,9 +469,9 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -487,9 +487,9 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -507,9 +507,9 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -527,9 +527,9 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -548,9 +548,9 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -570,9 +570,9 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
@@ -590,9 +590,9 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -608,9 +608,9 @@ define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v8, v12, 0
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -626,9 +626,9 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -646,9 +646,9 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -667,9 +667,9 @@ define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vs1r.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
index d2f73826e4e9e..fe928e10c409a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
@@ -42,9 +42,9 @@ define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) {
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -78,9 +78,9 @@ define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) {
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <4 x i64> %x, zeroinitializer
   %b = sext <4 x i1> %a to <4 x i64>
@@ -94,9 +94,9 @@ define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) {
 ; CHECK-LABEL: test_v4i64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v10, v8, zero
+; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <4 x i64> %x, zeroinitializer
   %b = sext <4 x i1> %a to <4 x i64>
@@ -128,9 +128,9 @@ define <vscale x 4 x i8> @test_nxv4i32_nxv4i8(<vscale x 4 x i32> %x) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i32> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i32>
@@ -146,9 +146,9 @@ define <vscale x 4 x i8> @test_nxv4i64_nxv4i8(<vscale x 4 x i64> %x) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -164,9 +164,9 @@ define <vscale x 4 x i16> @test_nxv4i32_nxv4i16(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: test_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v10, v8, zero
+; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i32> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i32>
@@ -182,9 +182,9 @@ define <vscale x 4 x i16> @test_nxv4i64_nxv4i16(<vscale x 4 x i64> %x) {
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i64> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i64>
@@ -198,9 +198,9 @@ define <vscale x 4 x i32> @test_nxv4i64_nxv4i32(<vscale x 4 x i64> %x) {
 ; CHECK-LABEL: test_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v12, v8, zero
+; CHECK-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i64> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
index 7954d025a6fba..3f50247afa41e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -262,9 +262,9 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_lshr2(<vscale x 8 x i8> %x, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vadd.vi v10, v10, 2
+; CHECK-NEXT:    vadd.vi v8, v10, 2
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 2
+; CHECK-NEXT:    vnsrl.wi v8, v8, 2
 ; CHECK-NEXT:    ret
   %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
   %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
@@ -281,9 +281,9 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_add2(<vscale x 8 x i8> %x, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vadd.vi v10, v10, 2
+; CHECK-NEXT:    vadd.vi v8, v10, 2
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 2
+; CHECK-NEXT:    vnsrl.wi v8, v8, 2
 ; CHECK-NEXT:    ret
   %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
   %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 4753ab915bdf3..1c765dc795948 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -63,8 +63,7 @@ define {<4 x i32>, <4 x i32>} @vector_deinterleave_v4i32_vv8i32(<8 x i32> %vec)
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vnsrl.wi v11, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vmv.v.v v9, v10
 ; CHECK-NEXT:    ret
 %retval = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v8i32(<8 x i32> %vec)
@@ -449,8 +448,7 @@ define {<4 x float>, <4 x float>} @vector_deinterleave_v4f32_v8f32(<8 x float> %
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vnsrl.wi v11, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vmv.v.v v9, v10
 ; CHECK-NEXT:    ret
 %retval = call {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 9344c52098684..a95e44222ab97 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -21,9 +21,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_load_nxv16i
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vmerge.vim v8, v12, 1, v0
 ; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vnsrl.wi v14, v8, 8
+; CHECK-NEXT:    vnsrl.wi v8, v8, 8
 ; CHECK-NEXT:    vmsne.vi v0, v12, 0
-; CHECK-NEXT:    vmsne.vi v8, v14, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    ret
   %vec = load <vscale x 32 x i1>, ptr %p
   %deinterleaved.results = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 6144f916ea52b..18a67cdca2a83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -18,9 +18,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv
 ; V-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; V-NEXT:    vmerge.vim v10, v10, 1, v0
 ; V-NEXT:    vnsrl.wi v12, v8, 0
-; V-NEXT:    vnsrl.wi v14, v8, 8
+; V-NEXT:    vnsrl.wi v8, v8, 8
 ; V-NEXT:    vmsne.vi v0, v12, 0
-; V-NEXT:    vmsne.vi v8, v14, 0
+; V-NEXT:    vmsne.vi v8, v8, 0
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
@@ -93,8 +93,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; V-NEXT:    vnsrl.wx v12, v8, a0
-; V-NEXT:    vnsrl.wi v14, v8, 0
-; V-NEXT:    vmv.v.v v8, v14
+; V-NEXT:    vnsrl.wi v8, v8, 0
 ; V-NEXT:    vmv.v.v v10, v12
 ; V-NEXT:    ret
 ;
@@ -175,13 +174,12 @@ define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv
 ; V-NEXT:    vmerge.vim v24, v24, 1, v0
 ; V-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; V-NEXT:    vnsrl.wi v8, v16, 0
-; V-NEXT:    vnsrl.wi v0, v16, 8
+; V-NEXT:    vnsrl.wi v16, v16, 8
 ; V-NEXT:    vnsrl.wi v12, v24, 0
-; V-NEXT:    vnsrl.wi v4, v24, 8
+; V-NEXT:    vnsrl.wi v20, v24, 8
 ; V-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; V-NEXT:    vmsne.vi v16, v8, 0
-; V-NEXT:    vmsne.vi v8, v0, 0
-; V-NEXT:    vmv1r.v v0, v16
+; V-NEXT:    vmsne.vi v0, v8, 0
+; V-NEXT:    vmsne.vi v8, v16, 0
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv64i1_nxv128i1:
@@ -194,14 +192,13 @@ define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv
 ; ZIP-NEXT:    vmv1r.v v0, v9
 ; ZIP-NEXT:    vmerge.vim v24, v24, 1, v0
 ; ZIP-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZIP-NEXT:    ri.vunzip2a.vv v12, v16, v20
-; ZIP-NEXT:    ri.vunzip2b.vv v4, v16, v20
-; ZIP-NEXT:    ri.vunzip2a.vv v8, v24, v28
-; ZIP-NEXT:    ri.vunzip2b.vv v0, v24, v28
+; ZIP-NEXT:    ri.vunzip2a.vv v4, v16, v20
+; ZIP-NEXT:    ri.vunzip2b.vv v12, v16, v20
+; ZIP-NEXT:    ri.vunzip2a.vv v0, v24, v28
+; ZIP-NEXT:    ri.vunzip2b.vv v8, v24, v28
 ; ZIP-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; ZIP-NEXT:    vmsne.vi v16, v8, 0
-; ZIP-NEXT:    vmsne.vi v8, v0, 0
-; ZIP-NEXT:    vmv1r.v v0, v16
+; ZIP-NEXT:    vmsne.vi v0, v0, 0
+; ZIP-NEXT:    vmsne.vi v8, v8, 0
 ; ZIP-NEXT:    ret
 %retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec)
 ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval
@@ -213,10 +210,10 @@ define {<vscale x 64 x i8>, <vscale x 64 x i8>} @vector_deinterleave_nxv64i8_nxv
 ; V-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vnsrl.wi v8, v24, 0
-; V-NEXT:    vnsrl.wi v0, v24, 8
+; V-NEXT:    vnsrl.wi v24, v24, 8
 ; V-NEXT:    vnsrl.wi v12, v16, 0
-; V-NEXT:    vnsrl.wi v4, v16, 8
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vnsrl.wi v28, v16, 8
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv64i8_nxv128i8:
@@ -239,10 +236,10 @@ define {<vscale x 32 x i16>, <vscale x 32 x i16>} @vector_deinterleave_nxv32i16_
 ; V-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vnsrl.wi v8, v24, 0
-; V-NEXT:    vnsrl.wi v0, v24, 16
+; V-NEXT:    vnsrl.wi v24, v24, 16
 ; V-NEXT:    vnsrl.wi v12, v16, 0
-; V-NEXT:    vnsrl.wi v4, v16, 16
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vnsrl.wi v28, v16, 16
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv32i16_nxv64i16:
@@ -267,9 +264,8 @@ define {<vscale x 16 x i32>, <vscale x 16 x i32>} @vector_deinterleave_nxv16i32_
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vnsrl.wx v20, v24, a0
 ; V-NEXT:    vnsrl.wx v16, v8, a0
-; V-NEXT:    vnsrl.wi v0, v8, 0
-; V-NEXT:    vnsrl.wi v4, v24, 0
-; V-NEXT:    vmv8r.v v8, v0
+; V-NEXT:    vnsrl.wi v8, v8, 0
+; V-NEXT:    vnsrl.wi v12, v24, 0
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv16i32_nxvv32i32:
@@ -863,24 +859,21 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
 ; CHECK-NEXT:    vmerge.vim v28, v10, 1, v0
 ; CHECK-NEXT:    vs8r.v v24, (a0)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vlseg6e8.v v16, (a2)
-; CHECK-NEXT:    vlseg6e8.v v10, (a0)
-; CHECK-NEXT:    vmv2r.v v8, v16
-; CHECK-NEXT:    vmv2r.v v22, v18
-; CHECK-NEXT:    vmv2r.v v24, v20
-; CHECK-NEXT:    vmv1r.v v9, v10
+; CHECK-NEXT:    vlseg6e8.v v14, (a2)
+; CHECK-NEXT:    vlseg6e8.v v8, (a0)
+; CHECK-NEXT:    vmv2r.v v20, v14
+; CHECK-NEXT:    vmv2r.v v22, v16
+; CHECK-NEXT:    vmv2r.v v24, v18
+; CHECK-NEXT:    vmv1r.v v21, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    vmv1r.v v10, v17
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
-; CHECK-NEXT:    vmv1r.v v23, v12
+; CHECK-NEXT:    vmsne.vi v0, v20, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmv1r.v v23, v10
 ; CHECK-NEXT:    vmsne.vi v9, v22, 0
-; CHECK-NEXT:    vmv1r.v v12, v19
-; CHECK-NEXT:    vmsne.vi v10, v12, 0
-; CHECK-NEXT:    vmv1r.v v25, v14
+; CHECK-NEXT:    vmsne.vi v10, v10, 0
+; CHECK-NEXT:    vmv1r.v v25, v12
 ; CHECK-NEXT:    vmsne.vi v11, v24, 0
-; CHECK-NEXT:    vmv1r.v v14, v21
-; CHECK-NEXT:    vmsne.vi v12, v14, 0
+; CHECK-NEXT:    vmsne.vi v12, v12, 0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
@@ -1374,29 +1367,25 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
 ; CHECK-NEXT:    vmerge.vim v26, v10, 1, v0
 ; CHECK-NEXT:    vs8r.v v24, (a0)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vlseg8e8.v v18, (a2)
-; CHECK-NEXT:    vlseg8e8.v v10, (a0)
-; CHECK-NEXT:    vmv2r.v v8, v18
-; CHECK-NEXT:    vmv2r.v v26, v20
-; CHECK-NEXT:    vmv2r.v v28, v22
-; CHECK-NEXT:    vmv2r.v v30, v24
-; CHECK-NEXT:    vmv1r.v v9, v10
+; CHECK-NEXT:    vlseg8e8.v v16, (a2)
+; CHECK-NEXT:    vlseg8e8.v v8, (a0)
+; CHECK-NEXT:    vmv2r.v v24, v16
+; CHECK-NEXT:    vmv2r.v v26, v18
+; CHECK-NEXT:    vmv2r.v v28, v20
+; CHECK-NEXT:    vmv2r.v v30, v22
+; CHECK-NEXT:    vmv1r.v v25, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    vmv1r.v v10, v19
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
-; CHECK-NEXT:    vmv1r.v v27, v12
+; CHECK-NEXT:    vmsne.vi v0, v24, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmv1r.v v27, v10
 ; CHECK-NEXT:    vmsne.vi v9, v26, 0
-; CHECK-NEXT:    vmv1r.v v12, v21
-; CHECK-NEXT:    vmsne.vi v10, v12, 0
-; CHECK-NEXT:    vmv1r.v v29, v14
+; CHECK-NEXT:    vmsne.vi v10, v10, 0
+; CHECK-NEXT:    vmv1r.v v29, v12
 ; CHECK-NEXT:    vmsne.vi v11, v28, 0
-; CHECK-NEXT:    vmv1r.v v14, v23
-; CHECK-NEXT:    vmsne.vi v12, v14, 0
-; CHECK-NEXT:    vmv1r.v v31, v16
+; CHECK-NEXT:    vmsne.vi v12, v12, 0
+; CHECK-NEXT:    vmv1r.v v31, v14
 ; CHECK-NEXT:    vmsne.vi v13, v30, 0
-; CHECK-NEXT:    vmv1r.v v16, v25
-; CHECK-NEXT:    vmsne.vi v14, v16, 0
+; CHECK-NEXT:    vmsne.vi v14, v14, 0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
@@ -1666,8 +1655,7 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; V-NEXT:    vnsrl.wx v10, v8, a0
-; V-NEXT:    vnsrl.wi v11, v8, 0
-; V-NEXT:    vmv.v.v v8, v11
+; V-NEXT:    vnsrl.wi v8, v8, 0
 ; V-NEXT:    vmv.v.v v9, v10
 ; V-NEXT:    ret
 ;
@@ -1733,8 +1721,7 @@ define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; V-NEXT:    vnsrl.wx v12, v8, a0
-; V-NEXT:    vnsrl.wi v14, v8, 0
-; V-NEXT:    vmv.v.v v8, v14
+; V-NEXT:    vnsrl.wi v8, v8, 0
 ; V-NEXT:    vmv.v.v v10, v12
 ; V-NEXT:    ret
 ;
@@ -1784,10 +1771,10 @@ define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv
 ; V-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vnsrl.wi v8, v24, 0
-; V-NEXT:    vnsrl.wi v0, v24, 16
+; V-NEXT:    vnsrl.wi v24, v24, 16
 ; V-NEXT:    vnsrl.wi v12, v16, 0
-; V-NEXT:    vnsrl.wi v4, v16, 16
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vnsrl.wi v28, v16, 16
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
@@ -1810,10 +1797,10 @@ define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f1
 ; V-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vnsrl.wi v8, v24, 0
-; V-NEXT:    vnsrl.wi v0, v24, 16
+; V-NEXT:    vnsrl.wi v24, v24, 16
 ; V-NEXT:    vnsrl.wi v12, v16, 0
-; V-NEXT:    vnsrl.wi v4, v16, 16
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vnsrl.wi v28, v16, 16
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
@@ -1838,9 +1825,8 @@ define {<vscale x 16 x float>, <vscale x 16 x float>} @vector_deinterleave_nxv16
 ; V-NEXT:    li a0, 32
 ; V-NEXT:    vnsrl.wx v20, v24, a0
 ; V-NEXT:    vnsrl.wx v16, v8, a0
-; V-NEXT:    vnsrl.wi v0, v8, 0
-; V-NEXT:    vnsrl.wi v4, v24, 0
-; V-NEXT:    vmv8r.v v8, v0
+; V-NEXT:    vnsrl.wi v8, v8, 0
+; V-NEXT:    vnsrl.wi v12, v24, 0
 ; V-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_deinterleave_nxv16f32_nxv32f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 3dc83d50ee3f3..c74363019bfd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -67,8 +67,8 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
@@ -76,10 +76,9 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; ZVBB-LABEL: vector_interleave_v16i16_v8i16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
-; ZVBB-NEXT:    vwsll.vi v8, v10, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vmv1r.v v10, v8
+; ZVBB-NEXT:    vwsll.vi v8, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_v16i16_v8i16:
@@ -98,8 +97,8 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
@@ -107,11 +106,10 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; ZVBB-LABEL: vector_interleave_v8i32_v4i32:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
+; ZVBB-NEXT:    vmv1r.v v10, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v10, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vwsll.vx v8, v9, a0
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_v8i32_v4i32:
@@ -892,8 +890,8 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
@@ -901,10 +899,9 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
 ; ZVBB-LABEL: vector_interleave_v16f16_v8f16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
-; ZVBB-NEXT:    vwsll.vi v8, v10, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vmv1r.v v10, v8
+; ZVBB-NEXT:    vwsll.vi v8, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_v16f16_v8f16:
@@ -923,8 +920,8 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
@@ -932,11 +929,10 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
 ; ZVBB-LABEL: vector_interleave_v8f32_v4f32:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
+; ZVBB-NEXT:    vmv1r.v v10, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v10, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vwsll.vx v8, v9, a0
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_v8f32_v4f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index a5811e697634a..94555f59ddc38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -16,17 +16,17 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    vmerge.vim v12, v10, 1, v0
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmerge.vim v14, v10, 1, v0
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT:    vwaddu.vv v8, v10, v12
 ; CHECK-NEXT:    vwmaccu.vx v8, a1, v12
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    vmsne.vi v12, v10, 0
-; CHECK-NEXT:    vmsne.vi v10, v8, 0
+; CHECK-NEXT:    vmsne.vi v10, v10, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    srli a1, a1, 2
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v10, v12, a1
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vsm.v v10, (a0)
+; CHECK-NEXT:    vsm.v v8, (a0)
 ; CHECK-NEXT:    ret
   %res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
   store <vscale x 32 x i1> %res, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 01cc5c58b24ce..2a798c9b44ee4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -19,15 +19,15 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vmerge.vim v12, v10, 1, v0
 ; V-NEXT:    vmv1r.v v0, v9
-; V-NEXT:    vmerge.vim v14, v10, 1, v0
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmerge.vim v10, v10, 1, v0
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    csrr a0, vlenb
-; V-NEXT:    vmsne.vi v12, v10, 0
+; V-NEXT:    vmsne.vi v10, v10, 0
 ; V-NEXT:    vmsne.vi v0, v8, 0
 ; V-NEXT:    srli a0, a0, 2
 ; V-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; V-NEXT:    vslideup.vx v0, v12, a0
+; V-NEXT:    vslideup.vx v0, v10, a0
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -76,8 +76,8 @@ define <vscale x 32 x i8> @vector_interleave_nxv32i8_nxv16i8(<vscale x 16 x i8>
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -85,10 +85,9 @@ define <vscale x 32 x i8> @vector_interleave_nxv32i8_nxv16i8(<vscale x 16 x i8>
 ; ZVBB-LABEL: vector_interleave_nxv32i8_nxv16i8:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
-; ZVBB-NEXT:    vwsll.vi v8, v12, 8
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vmv2r.v v12, v8
+; ZVBB-NEXT:    vwsll.vi v8, v10, 8
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv32i8_nxv16i8:
@@ -108,8 +107,8 @@ define <vscale x 16 x i16> @vector_interleave_nxv16i16_nxv8i16(<vscale x 8 x i16
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -117,10 +116,9 @@ define <vscale x 16 x i16> @vector_interleave_nxv16i16_nxv8i16(<vscale x 8 x i16
 ; ZVBB-LABEL: vector_interleave_nxv16i16_nxv8i16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
-; ZVBB-NEXT:    vwsll.vi v8, v12, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vmv2r.v v12, v8
+; ZVBB-NEXT:    vwsll.vi v8, v10, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv16i16_nxv8i16:
@@ -140,8 +138,8 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32(<vscale x 4 x i32>
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -149,11 +147,10 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32(<vscale x 4 x i32>
 ; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
+; ZVBB-NEXT:    vmv2r.v v12, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v12, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv8i32_nxv4i32:
@@ -225,13 +222,12 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1
 ; V-NEXT:    vmerge.vim v24, v24, 1, v0
 ; V-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; V-NEXT:    vwaddu.vv v8, v24, v16
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
+; V-NEXT:    vwmaccu.vx v24, a0, v20
 ; V-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; V-NEXT:    vmsne.vi v16, v8, 0
-; V-NEXT:    vmsne.vi v8, v0, 0
-; V-NEXT:    vmv1r.v v0, v16
+; V-NEXT:    vmsne.vi v0, v8, 0
+; V-NEXT:    vmsne.vi v8, v24, 0
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv128i1_nxv64i1:
@@ -243,13 +239,12 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1
 ; ZVBB-NEXT:    vmerge.vim v24, v24, 1, v0
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; ZVBB-NEXT:    vwsll.vi v8, v24, 8
-; ZVBB-NEXT:    vwsll.vi v0, v28, 8
+; ZVBB-NEXT:    vwsll.vi v24, v28, 8
 ; ZVBB-NEXT:    vwaddu.wv v8, v8, v16
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v20
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v20
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; ZVBB-NEXT:    vmsne.vi v16, v8, 0
-; ZVBB-NEXT:    vmsne.vi v8, v0, 0
-; ZVBB-NEXT:    vmv1r.v v0, v16
+; ZVBB-NEXT:    vmsne.vi v0, v8, 0
+; ZVBB-NEXT:    vmsne.vi v8, v24, 0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv128i1_nxv64i1:
@@ -267,9 +262,8 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1
 ; ZIP-NEXT:    ri.vzip2a.vv v0, v8, v16
 ; ZIP-NEXT:    ri.vzip2a.vv v24, v12, v20
 ; ZIP-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; ZIP-NEXT:    vmsne.vi v9, v0, 0
+; ZIP-NEXT:    vmsne.vi v0, v0, 0
 ; ZIP-NEXT:    vmsne.vi v8, v24, 0
-; ZIP-NEXT:    vmv1r.v v0, v9
 ; ZIP-NEXT:    ret
   %res = call <vscale x 128 x i1> @llvm.vector.interleave2.nxv128i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b)
   ret <vscale x 128 x i1> %res
@@ -282,21 +276,21 @@ define <vscale x 128 x i8> @vector_interleave_nxv128i8_nxv64i8(<vscale x 64 x i8
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv128i8_nxv64i8:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vi v24, v16, 8
-; ZVBB-NEXT:    vwsll.vi v0, v20, 8
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vmv8r.v v24, v16
+; ZVBB-NEXT:    vwsll.vi v0, v24, 8
+; ZVBB-NEXT:    vwsll.vi v16, v28, 8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv128i8_nxv64i8:
@@ -320,21 +314,21 @@ define <vscale x 64 x i16> @vector_interleave_nxv64i16_nxv32i16(<vscale x 32 x i
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv64i16_nxv32i16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vi v24, v16, 16
-; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vmv8r.v v24, v16
+; ZVBB-NEXT:    vwsll.vi v0, v24, 16
+; ZVBB-NEXT:    vwsll.vi v16, v28, 16
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv64i16_nxv32i16:
@@ -358,22 +352,22 @@ define <vscale x 32 x i32> @vector_interleave_nxv32i32_nxv16i32(<vscale x 16 x i
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32i32_nxv16i32:
 ; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVBB-NEXT:    vmv8r.v v24, v16
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vx v24, v16, a0
-; ZVBB-NEXT:    vwsll.vx v0, v20, a0
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vwsll.vx v0, v24, a0
+; ZVBB-NEXT:    vwsll.vx v16, v28, a0
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv32i32_nxv16i32:
@@ -447,15 +441,15 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4
 ; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v8
-; ZVBB-NEXT:    vzext.vf2 v8, v12
+; ZVBB-NEXT:    vmv2r.v v10, v8
+; ZVBB-NEXT:    vzext.vf2 v8, v10
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> poison)
   ret <vscale x 8 x i32> %res
@@ -473,9 +467,9 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison2(<vscale x 4
 ; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v8
+; ZVBB-NEXT:    vmv2r.v v10, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v12, a0
+; ZVBB-NEXT:    vwsll.vx v8, v10, a0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a)
   ret <vscale x 8 x i32> %res
@@ -509,13 +503,13 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
 ; CHECK-NEXT:    vl2r.v v10, (a3)
 ; CHECK-NEXT:    vl2r.v v12, (a0)
 ; CHECK-NEXT:    srli a1, a1, 2
-; CHECK-NEXT:    vmsne.vi v14, v8, 0
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmsne.vi v9, v10, 0
 ; CHECK-NEXT:    vmsne.vi v0, v12, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v8, a1
+; CHECK-NEXT:    vslideup.vx v0, v9, a1
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v14, a2
+; CHECK-NEXT:    vslideup.vx v0, v8, a2
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    li a1, 6
 ; CHECK-NEXT:    mul a0, a0, a1
@@ -550,13 +544,13 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
 ; ZVBB-NEXT:    vl2r.v v10, (a3)
 ; ZVBB-NEXT:    vl2r.v v12, (a0)
 ; ZVBB-NEXT:    srli a1, a1, 2
-; ZVBB-NEXT:    vmsne.vi v14, v8, 0
-; ZVBB-NEXT:    vmsne.vi v8, v10, 0
+; ZVBB-NEXT:    vmsne.vi v8, v8, 0
+; ZVBB-NEXT:    vmsne.vi v9, v10, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v12, 0
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v8, a1
+; ZVBB-NEXT:    vslideup.vx v0, v9, a1
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v14, a2
+; ZVBB-NEXT:    vslideup.vx v0, v8, a2
 ; ZVBB-NEXT:    csrr a0, vlenb
 ; ZVBB-NEXT:    li a1, 6
 ; ZVBB-NEXT:    mul a0, a0, a1
@@ -810,15 +804,15 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
 ; CHECK-NEXT:    vl2r.v v10, (a4)
 ; CHECK-NEXT:    vl2r.v v12, (a3)
 ; CHECK-NEXT:    vl2r.v v14, (a0)
-; CHECK-NEXT:    vmsne.vi v16, v8, 0
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
-; CHECK-NEXT:    vmsne.vi v9, v12, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
+; CHECK-NEXT:    vmsne.vi v9, v10, 0
+; CHECK-NEXT:    vmsne.vi v10, v12, 0
 ; CHECK-NEXT:    vmsne.vi v0, v14, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v16, a1
-; CHECK-NEXT:    vslideup.vx v0, v9, a1
+; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vslideup.vx v0, v10, a1
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v8, a2
+; CHECK-NEXT:    vslideup.vx v0, v9, a2
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
@@ -855,15 +849,15 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
 ; ZVBB-NEXT:    vl2r.v v10, (a4)
 ; ZVBB-NEXT:    vl2r.v v12, (a3)
 ; ZVBB-NEXT:    vl2r.v v14, (a0)
-; ZVBB-NEXT:    vmsne.vi v16, v8, 0
-; ZVBB-NEXT:    vmsne.vi v8, v10, 0
-; ZVBB-NEXT:    vmsne.vi v9, v12, 0
+; ZVBB-NEXT:    vmsne.vi v8, v8, 0
+; ZVBB-NEXT:    vmsne.vi v9, v10, 0
+; ZVBB-NEXT:    vmsne.vi v10, v12, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v14, 0
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v8, v16, a1
-; ZVBB-NEXT:    vslideup.vx v0, v9, a1
+; ZVBB-NEXT:    vslideup.vx v9, v8, a1
+; ZVBB-NEXT:    vslideup.vx v0, v10, a1
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v8, a2
+; ZVBB-NEXT:    vslideup.vx v0, v9, a2
 ; ZVBB-NEXT:    csrr a0, vlenb
 ; ZVBB-NEXT:    slli a0, a0, 3
 ; ZVBB-NEXT:    add sp, sp, a0
@@ -1139,15 +1133,15 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
 ; CHECK-NEXT:    vl1r.v v13, (a0)
 ; CHECK-NEXT:    vl1r.v v17, (a5)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v18, v8, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    vmsne.vi v0, v10, 0
-; CHECK-NEXT:    vmsne.vi v8, v14, 0
-; CHECK-NEXT:    vmsne.vi v9, v12, 0
+; CHECK-NEXT:    vmsne.vi v9, v14, 0
+; CHECK-NEXT:    vmsne.vi v10, v12, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v18, a1
-; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vslideup.vx v0, v8, a1
+; CHECK-NEXT:    vslideup.vx v10, v9, a1
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v9, a3
+; CHECK-NEXT:    vslideup.vx v0, v10, a3
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmsne.vi v8, v16, 0
 ; CHECK-NEXT:    csrr a0, vlenb
@@ -1213,15 +1207,15 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
 ; ZVBB-NEXT:    vl1r.v v13, (a0)
 ; ZVBB-NEXT:    vl1r.v v17, (a5)
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVBB-NEXT:    vmsne.vi v18, v8, 0
+; ZVBB-NEXT:    vmsne.vi v8, v8, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v10, 0
-; ZVBB-NEXT:    vmsne.vi v8, v14, 0
-; ZVBB-NEXT:    vmsne.vi v9, v12, 0
+; ZVBB-NEXT:    vmsne.vi v9, v14, 0
+; ZVBB-NEXT:    vmsne.vi v10, v12, 0
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v18, a1
-; ZVBB-NEXT:    vslideup.vx v9, v8, a1
+; ZVBB-NEXT:    vslideup.vx v0, v8, a1
+; ZVBB-NEXT:    vslideup.vx v10, v9, a1
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v9, a3
+; ZVBB-NEXT:    vslideup.vx v0, v10, a3
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; ZVBB-NEXT:    vmsne.vi v8, v16, 0
 ; ZVBB-NEXT:    csrr a0, vlenb
@@ -2369,17 +2363,17 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmsne.vi v20, v8, 0
 ; CHECK-NEXT:    vmsne.vi v9, v16, 0
-; CHECK-NEXT:    vmsne.vi v16, v10, 0
+; CHECK-NEXT:    vmsne.vi v10, v10, 0
 ; CHECK-NEXT:    vmsne.vi v0, v12, 0
-; CHECK-NEXT:    vmsne.vi v10, v18, 0
+; CHECK-NEXT:    vmsne.vi v11, v18, 0
 ; CHECK-NEXT:    vmsne.vi v8, v14, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslideup.vx v9, v20, a1
-; CHECK-NEXT:    vslideup.vx v0, v16, a1
+; CHECK-NEXT:    vslideup.vx v0, v10, a1
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vslideup.vx v0, v9, a3
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    vslideup.vx v8, v11, a1
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    li a1, 12
 ; CHECK-NEXT:    mul a0, a0, a1
@@ -2454,17 +2448,17 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; ZVBB-NEXT:    vmsne.vi v20, v8, 0
 ; ZVBB-NEXT:    vmsne.vi v9, v16, 0
-; ZVBB-NEXT:    vmsne.vi v16, v10, 0
+; ZVBB-NEXT:    vmsne.vi v10, v10, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v12, 0
-; ZVBB-NEXT:    vmsne.vi v10, v18, 0
+; ZVBB-NEXT:    vmsne.vi v11, v18, 0
 ; ZVBB-NEXT:    vmsne.vi v8, v14, 0
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
 ; ZVBB-NEXT:    vslideup.vx v9, v20, a1
-; ZVBB-NEXT:    vslideup.vx v0, v16, a1
+; ZVBB-NEXT:    vslideup.vx v0, v10, a1
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; ZVBB-NEXT:    vslideup.vx v0, v9, a3
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v8, v10, a1
+; ZVBB-NEXT:    vslideup.vx v8, v11, a1
 ; ZVBB-NEXT:    csrr a0, vlenb
 ; ZVBB-NEXT:    li a1, 12
 ; ZVBB-NEXT:    mul a0, a0, a1
@@ -5568,30 +5562,30 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
 ; CHECK-NEXT:    vl1r.v v9, (a7)
 ; CHECK-NEXT:    vl1r.v v15, (a3)
 ; CHECK-NEXT:    vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v20, v16, 0
-; CHECK-NEXT:    vmsne.vi v16, v12, 0
+; CHECK-NEXT:    vmsne.vi v16, v16, 0
+; CHECK-NEXT:    vmsne.vi v17, v12, 0
 ; CHECK-NEXT:    vl1r.v v12, (a6)
-; CHECK-NEXT:    vmsne.vi v17, v8, 0
+; CHECK-NEXT:    vmsne.vi v20, v8, 0
 ; CHECK-NEXT:    vmsne.vi v0, v14, 0
-; CHECK-NEXT:    vl1r.v v14, (a1)
+; CHECK-NEXT:    vl1r.v v8, (a1)
 ; CHECK-NEXT:    vl1r.v v19, (a5)
 ; CHECK-NEXT:    vl1r.v v11, (a2)
 ; CHECK-NEXT:    vl1r.v v13, (t0)
-; CHECK-NEXT:    vl1r.v v15, (a4)
-; CHECK-NEXT:    vmsne.vi v9, v18, 0
-; CHECK-NEXT:    vmsne.vi v18, v10, 0
-; CHECK-NEXT:    vmsne.vi v10, v12, 0
-; CHECK-NEXT:    vmsne.vi v8, v14, 0
+; CHECK-NEXT:    vl1r.v v9, (a4)
+; CHECK-NEXT:    vmsne.vi v14, v18, 0
+; CHECK-NEXT:    vmsne.vi v10, v10, 0
+; CHECK-NEXT:    vmsne.vi v11, v12, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    srli a1, a0, 2
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v16, v20, a1
-; CHECK-NEXT:    vslideup.vx v0, v17, a1
-; CHECK-NEXT:    vslideup.vx v18, v9, a1
-; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    vslideup.vx v17, v16, a1
+; CHECK-NEXT:    vslideup.vx v0, v20, a1
+; CHECK-NEXT:    vslideup.vx v10, v14, a1
+; CHECK-NEXT:    vslideup.vx v8, v11, a1
 ; CHECK-NEXT:    srli a0, a0, 1
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v16, a0
-; CHECK-NEXT:    vslideup.vx v8, v18, a0
+; CHECK-NEXT:    vslideup.vx v0, v17, a0
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add sp, sp, a0
@@ -5667,30 +5661,30 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
 ; ZVBB-NEXT:    vl1r.v v9, (a7)
 ; ZVBB-NEXT:    vl1r.v v15, (a3)
 ; ZVBB-NEXT:    vsetvli a3, zero, e8, m2, ta, ma
-; ZVBB-NEXT:    vmsne.vi v20, v16, 0
-; ZVBB-NEXT:    vmsne.vi v16, v12, 0
+; ZVBB-NEXT:    vmsne.vi v16, v16, 0
+; ZVBB-NEXT:    vmsne.vi v17, v12, 0
 ; ZVBB-NEXT:    vl1r.v v12, (a6)
-; ZVBB-NEXT:    vmsne.vi v17, v8, 0
+; ZVBB-NEXT:    vmsne.vi v20, v8, 0
 ; ZVBB-NEXT:    vmsne.vi v0, v14, 0
-; ZVBB-NEXT:    vl1r.v v14, (a1)
+; ZVBB-NEXT:    vl1r.v v8, (a1)
 ; ZVBB-NEXT:    vl1r.v v19, (a5)
 ; ZVBB-NEXT:    vl1r.v v11, (a2)
 ; ZVBB-NEXT:    vl1r.v v13, (t0)
-; ZVBB-NEXT:    vl1r.v v15, (a4)
-; ZVBB-NEXT:    vmsne.vi v9, v18, 0
-; ZVBB-NEXT:    vmsne.vi v18, v10, 0
-; ZVBB-NEXT:    vmsne.vi v10, v12, 0
-; ZVBB-NEXT:    vmsne.vi v8, v14, 0
+; ZVBB-NEXT:    vl1r.v v9, (a4)
+; ZVBB-NEXT:    vmsne.vi v14, v18, 0
+; ZVBB-NEXT:    vmsne.vi v10, v10, 0
+; ZVBB-NEXT:    vmsne.vi v11, v12, 0
+; ZVBB-NEXT:    vmsne.vi v8, v8, 0
 ; ZVBB-NEXT:    srli a1, a0, 2
 ; ZVBB-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; ZVBB-NEXT:    vslideup.vx v16, v20, a1
-; ZVBB-NEXT:    vslideup.vx v0, v17, a1
-; ZVBB-NEXT:    vslideup.vx v18, v9, a1
-; ZVBB-NEXT:    vslideup.vx v8, v10, a1
+; ZVBB-NEXT:    vslideup.vx v17, v16, a1
+; ZVBB-NEXT:    vslideup.vx v0, v20, a1
+; ZVBB-NEXT:    vslideup.vx v10, v14, a1
+; ZVBB-NEXT:    vslideup.vx v8, v11, a1
 ; ZVBB-NEXT:    srli a0, a0, 1
 ; ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; ZVBB-NEXT:    vslideup.vx v0, v16, a0
-; ZVBB-NEXT:    vslideup.vx v8, v18, a0
+; ZVBB-NEXT:    vslideup.vx v0, v17, a0
+; ZVBB-NEXT:    vslideup.vx v8, v10, a0
 ; ZVBB-NEXT:    csrr a0, vlenb
 ; ZVBB-NEXT:    slli a0, a0, 4
 ; ZVBB-NEXT:    add sp, sp, a0
@@ -6309,8 +6303,8 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; V-NEXT:    vmv1r.v v10, v9
-; V-NEXT:    vmv1r.v v11, v8
-; V-NEXT:    vwaddu.vv v8, v11, v10
+; V-NEXT:    vmv1r.v v9, v8
+; V-NEXT:    vwaddu.vv v8, v9, v10
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v10
 ; V-NEXT:    ret
@@ -6318,10 +6312,9 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x
 ; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
-; ZVBB-NEXT:    vwsll.vi v8, v10, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vmv1r.v v10, v8
+; ZVBB-NEXT:    vwsll.vi v8, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
@@ -6384,8 +6377,8 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv4f16(<vscale x 4 x half
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; V-NEXT:    vmv1r.v v10, v9
-; V-NEXT:    vmv1r.v v11, v8
-; V-NEXT:    vwaddu.vv v8, v11, v10
+; V-NEXT:    vmv1r.v v9, v8
+; V-NEXT:    vwaddu.vv v8, v9, v10
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v10
 ; V-NEXT:    ret
@@ -6393,10 +6386,9 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv4f16(<vscale x 4 x half
 ; ZVBB-LABEL: vector_interleave_nxv8f16_nxv4f16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
-; ZVBB-NEXT:    vwsll.vi v8, v10, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vmv1r.v v10, v8
+; ZVBB-NEXT:    vwsll.vi v8, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv8f16_nxv4f16:
@@ -6416,8 +6408,8 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; V-NEXT:    vmv1r.v v10, v9
-; V-NEXT:    vmv1r.v v11, v8
-; V-NEXT:    vwaddu.vv v8, v11, v10
+; V-NEXT:    vmv1r.v v9, v8
+; V-NEXT:    vwaddu.vv v8, v9, v10
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v10
 ; V-NEXT:    ret
@@ -6425,11 +6417,10 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
 ; ZVBB-LABEL: vector_interleave_nxv4f32_nxv2f32:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; ZVBB-NEXT:    vmv1r.v v10, v9
-; ZVBB-NEXT:    vmv1r.v v11, v8
+; ZVBB-NEXT:    vmv1r.v v10, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v10, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v11
+; ZVBB-NEXT:    vwsll.vx v8, v9, a0
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v10
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv4f32_nxv2f32:
@@ -6449,8 +6440,8 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -6458,10 +6449,9 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8
 ; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
-; ZVBB-NEXT:    vwsll.vi v8, v12, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vmv2r.v v12, v8
+; ZVBB-NEXT:    vwsll.vi v8, v10, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
@@ -6481,8 +6471,8 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x ha
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -6490,10 +6480,9 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x ha
 ; ZVBB-LABEL: vector_interleave_nxv16f16_nxv8f16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
-; ZVBB-NEXT:    vwsll.vi v8, v12, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vmv2r.v v12, v8
+; ZVBB-NEXT:    vwsll.vi v8, v10, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv16f16_nxv8f16:
@@ -6513,8 +6502,8 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv4f32(<vscale x 4 x flo
 ; V:       # %bb.0:
 ; V-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; V-NEXT:    vmv2r.v v12, v10
-; V-NEXT:    vmv2r.v v14, v8
-; V-NEXT:    vwaddu.vv v8, v14, v12
+; V-NEXT:    vmv2r.v v10, v8
+; V-NEXT:    vwaddu.vv v8, v10, v12
 ; V-NEXT:    li a0, -1
 ; V-NEXT:    vwmaccu.vx v8, a0, v12
 ; V-NEXT:    ret
@@ -6522,11 +6511,10 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv4f32(<vscale x 4 x flo
 ; ZVBB-LABEL: vector_interleave_nxv8f32_nxv4f32:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; ZVBB-NEXT:    vmv2r.v v12, v10
-; ZVBB-NEXT:    vmv2r.v v14, v8
+; ZVBB-NEXT:    vmv2r.v v12, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v12, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v14
+; ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v12
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv8f32_nxv4f32:
@@ -6593,21 +6581,21 @@ define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 3
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vi v24, v16, 16
-; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vmv8r.v v24, v16
+; ZVBB-NEXT:    vwsll.vi v0, v24, 16
+; ZVBB-NEXT:    vwsll.vi v16, v28, 16
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
@@ -6631,21 +6619,21 @@ define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv64f16_nxv32f16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vi v24, v16, 16
-; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vmv8r.v v24, v16
+; ZVBB-NEXT:    vwsll.vi v0, v24, 16
+; ZVBB-NEXT:    vwsll.vi v16, v28, 16
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv64f16_nxv32f16:
@@ -6669,22 +6657,22 @@ define <vscale x 32 x float> @vector_interleave_nxv32f32_nxv16f32(<vscale x 16 x
 ; V-NEXT:    vmv8r.v v24, v8
 ; V-NEXT:    vwaddu.vv v8, v24, v16
 ; V-NEXT:    li a0, -1
-; V-NEXT:    vwaddu.vv v0, v28, v20
+; V-NEXT:    vwaddu.vv v24, v28, v20
 ; V-NEXT:    vwmaccu.vx v8, a0, v16
-; V-NEXT:    vwmaccu.vx v0, a0, v20
-; V-NEXT:    vmv8r.v v16, v0
+; V-NEXT:    vwmaccu.vx v24, a0, v20
+; V-NEXT:    vmv8r.v v16, v24
 ; V-NEXT:    ret
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32f32_nxv16f32:
 ; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVBB-NEXT:    vmv8r.v v24, v16
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; ZVBB-NEXT:    vwsll.vx v24, v16, a0
-; ZVBB-NEXT:    vwsll.vx v0, v20, a0
-; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
-; ZVBB-NEXT:    vmv8r.v v8, v24
-; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    vwsll.vx v0, v24, a0
+; ZVBB-NEXT:    vwsll.vx v16, v28, a0
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v8
+; ZVBB-NEXT:    vwaddu.wv v16, v16, v12
+; ZVBB-NEXT:    vmv8r.v v8, v0
 ; ZVBB-NEXT:    ret
 ;
 ; ZIP-LABEL: vector_interleave_nxv32f32_nxv16f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
index d27f749880ab4..3b3d802facc7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
@@ -204,8 +204,8 @@ define <vscale x 8 x i16> @vsext_nxv8i8_nxv8i16(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -215,8 +215,8 @@ define <vscale x 8 x i16> @vzext_nxv8i8_nxv8i16(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -226,8 +226,8 @@ define <vscale x 8 x i32> @vsext_nxv8i8_nxv8i32(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
@@ -237,8 +237,8 @@ define <vscale x 8 x i32> @vzext_nxv8i8_nxv8i32(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
@@ -248,8 +248,8 @@ define <vscale x 8 x i64> @vsext_nxv8i8_nxv8i64(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vsext.vf8 v8, v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vsext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -259,8 +259,8 @@ define <vscale x 8 x i64> @vzext_nxv8i8_nxv8i64(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vzext.vf8 v8, v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vzext.vf8 v8, v15
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -270,8 +270,8 @@ define <vscale x 16 x i16> @vsext_nxv16i8_nxv16i16(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 16 x i8> %va to <vscale x 16 x i16>
   ret <vscale x 16 x i16> %evec
@@ -281,8 +281,8 @@ define <vscale x 16 x i16> @vzext_nxv16i8_nxv16i16(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 16 x i8> %va to <vscale x 16 x i16>
   ret <vscale x 16 x i16> %evec
@@ -292,8 +292,8 @@ define <vscale x 16 x i32> @vsext_nxv16i8_nxv16i32(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 16 x i8> %va to <vscale x 16 x i32>
   ret <vscale x 16 x i32> %evec
@@ -303,8 +303,8 @@ define <vscale x 16 x i32> @vzext_nxv16i8_nxv16i32(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 16 x i8> %va to <vscale x 16 x i32>
   ret <vscale x 16 x i32> %evec
@@ -314,8 +314,8 @@ define <vscale x 32 x i16> @vsext_nxv32i8_nxv32i16(<vscale x 32 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 32 x i8> %va to <vscale x 32 x i16>
   ret <vscale x 32 x i16> %evec
@@ -325,8 +325,8 @@ define <vscale x 32 x i16> @vzext_nxv32i8_nxv32i16(<vscale x 32 x i8> %va) {
 ; CHECK-LABEL: vzext_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 32 x i8> %va to <vscale x 32 x i16>
   ret <vscale x 32 x i16> %evec
@@ -424,8 +424,8 @@ define <vscale x 4 x i32> @vsext_nxv4i16_nxv4i32(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vsext_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 4 x i16> %va to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %evec
@@ -435,8 +435,8 @@ define <vscale x 4 x i32> @vzext_nxv4i16_nxv4i32(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vzext_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 4 x i16> %va to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %evec
@@ -446,8 +446,8 @@ define <vscale x 4 x i64> @vsext_nxv4i16_nxv4i64(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vsext_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -457,8 +457,8 @@ define <vscale x 4 x i64> @vzext_nxv4i16_nxv4i64(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vzext_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -468,8 +468,8 @@ define <vscale x 8 x i32> @vsext_nxv8i16_nxv8i32(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vsext_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i16> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
@@ -479,8 +479,8 @@ define <vscale x 8 x i32> @vzext_nxv8i16_nxv8i32(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vzext_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i16> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
@@ -490,8 +490,8 @@ define <vscale x 8 x i64> @vsext_nxv8i16_nxv8i64(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vsext_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -501,8 +501,8 @@ define <vscale x 8 x i64> @vzext_nxv8i16_nxv8i64(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vzext_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -512,8 +512,8 @@ define <vscale x 16 x i32> @vsext_nxv16i16_nxv16i32(<vscale x 16 x i16> %va) {
 ; CHECK-LABEL: vsext_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 16 x i16> %va to <vscale x 16 x i32>
   ret <vscale x 16 x i32> %evec
@@ -523,8 +523,8 @@ define <vscale x 16 x i32> @vzext_nxv16i16_nxv16i32(<vscale x 16 x i16> %va) {
 ; CHECK-LABEL: vzext_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 16 x i16> %va to <vscale x 16 x i32>
   ret <vscale x 16 x i32> %evec
@@ -556,8 +556,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vsext_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -567,8 +567,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vzext_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -578,8 +578,8 @@ define <vscale x 4 x i64> @vsext_nxv4i32_nxv4i64(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vsext_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -589,8 +589,8 @@ define <vscale x 4 x i64> @vzext_nxv4i32_nxv4i64(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vzext_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -600,8 +600,8 @@ define <vscale x 8 x i64> @vsext_nxv8i32_nxv8i64(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vsext_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -611,8 +611,8 @@ define <vscale x 8 x i64> @vzext_nxv8i32_nxv8i64(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vzext_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %evec = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
index 03ef641364335..fe0c07cfda01a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -37,9 +37,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -72,9 +72,9 @@ define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -89,9 +89,9 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v12, v10
+; CHECK-NEXT:    vfadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -103,13 +103,13 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -124,9 +124,9 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v16, v12
+; CHECK-NEXT:    vfadd.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -138,13 +138,13 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v12, v8
+; CHECK-NEXT:    vfadd.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -159,9 +159,9 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v24, v16
+; CHECK-NEXT:    vfadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -173,13 +173,13 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v16, v8
+; CHECK-NEXT:    vfadd.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -205,9 +205,9 @@ define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v0, v0, v8
+; CHECK-NEXT:    vfadd.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -247,9 +247,9 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v0, v8, v0
+; CHECK-NEXT:    vfadd.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -305,9 +305,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -329,9 +329,9 @@ define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -353,9 +353,9 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -377,9 +377,9 @@ define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -397,13 +397,13 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -425,9 +425,9 @@ define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -445,13 +445,13 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -473,9 +473,9 @@ define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x half> @llvm.experimental.constrained.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -493,13 +493,13 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -532,9 +532,9 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -580,9 +580,9 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
index 061b2b0c5ab37..6948dd28f3dce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fadd <vscale x 1 x bfloat> %va, %vb
   ret <vscale x 1 x bfloat> %vc
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vf v9, v9, fa5
+; CHECK-NEXT:    vfadd.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -51,9 +51,9 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fadd <vscale x 2 x bfloat> %va, %vb
   ret <vscale x 2 x bfloat> %vc
@@ -66,9 +66,9 @@ define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vf v9, v9, fa5
+; CHECK-NEXT:    vfadd.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -83,9 +83,9 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v12, v10
+; CHECK-NEXT:    vfadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fadd <vscale x 4 x bfloat> %va, %vb
   ret <vscale x 4 x bfloat> %vc
@@ -98,9 +98,9 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vf v10, v10, fa5
+; CHECK-NEXT:    vfadd.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -115,9 +115,9 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v16, v12
+; CHECK-NEXT:    vfadd.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fadd <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x bfloat> %vc
@@ -130,9 +130,9 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vf v12, v12, fa5
+; CHECK-NEXT:    vfadd.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -147,9 +147,9 @@ define <vscale x 8 x bfloat> @vfadd_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vf v12, v12, fa5
+; CHECK-NEXT:    vfadd.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v24, v16
+; CHECK-NEXT:    vfadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fadd <vscale x 16 x bfloat> %va, %vb
   ret <vscale x 16 x bfloat> %vc
@@ -179,9 +179,9 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vf v16, v16, fa5
+; CHECK-NEXT:    vfadd.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -207,9 +207,9 @@ define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v0, v0, v8
+; CHECK-NEXT:    vfadd.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -248,9 +248,9 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v0, v8, v0
+; CHECK-NEXT:    vfadd.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fadd <vscale x 1 x half> %va, %vb
   ret <vscale x 1 x half> %vc
@@ -302,9 +302,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -325,9 +325,9 @@ define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fadd <vscale x 2 x half> %va, %vb
   ret <vscale x 2 x half> %vc
@@ -346,9 +346,9 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -369,9 +369,9 @@ define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fadd <vscale x 4 x half> %va, %vb
   ret <vscale x 4 x half> %vc
@@ -390,9 +390,9 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -413,9 +413,9 @@ define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fadd <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x half> %vc
@@ -434,9 +434,9 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -457,9 +457,9 @@ define <vscale x 8 x half> @vfadd_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -480,9 +480,9 @@ define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fadd <vscale x 16 x half> %va, %vb
   ret <vscale x 16 x half> %vc
@@ -501,9 +501,9 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfadd.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -535,9 +535,9 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -582,9 +582,9 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 32e3d6bc663be..93d281541607e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -21,9 +21,9 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,9 +36,9 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -53,9 +53,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -72,9 +72,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -91,9 +91,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -110,9 +110,9 @@ define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked_commute(<vscale x 1 x b
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v8, v10
+; CHECK-NEXT:    vfadd.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -129,9 +129,9 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -144,9 +144,9 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v9, v10
+; CHECK-NEXT:    vfadd.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -161,9 +161,9 @@ define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -180,9 +180,9 @@ define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -199,9 +199,9 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v12, v10, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -214,9 +214,9 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v12, v10
+; CHECK-NEXT:    vfadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -227,13 +227,13 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v10, v8, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -246,13 +246,13 @@ define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v10, v8
+; CHECK-NEXT:    vfadd.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -269,9 +269,9 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v16, v12, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -284,9 +284,9 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v16, v12
+; CHECK-NEXT:    vfadd.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -297,13 +297,13 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v12, v8, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -316,13 +316,13 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v12, v8
+; CHECK-NEXT:    vfadd.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -339,9 +339,9 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v24, v16, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -354,9 +354,9 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v24, v16
+; CHECK-NEXT:    vfadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -367,13 +367,13 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfadd.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -386,13 +386,13 @@ define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v16, v8
+; CHECK-NEXT:    vfadd.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -521,7 +521,7 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v16, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
@@ -534,9 +534,9 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v24, v16, v0.t
@@ -588,20 +588,20 @@ define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmset.m v24
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmv.v.x v24, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v24, a2
+; CHECK-NEXT:    vslidedown.vx v0, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfadd.vv v16, v16, v24, v0.t
@@ -647,9 +647,9 @@ define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -668,9 +668,9 @@ define <vscale x 1 x half> @vfadd_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -691,9 +691,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -716,9 +716,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16_commute(<vscale x 1 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -741,9 +741,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -766,9 +766,9 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked_commute(<vscale x 1 x half
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -791,9 +791,9 @@ define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -812,9 +812,9 @@ define <vscale x 2 x half> @vfadd_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -835,9 +835,9 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -860,9 +860,9 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -885,9 +885,9 @@ define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -906,9 +906,9 @@ define <vscale x 4 x half> @vfadd_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -925,13 +925,13 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -950,13 +950,13 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -979,9 +979,9 @@ define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -1000,9 +1000,9 @@ define <vscale x 8 x half> @vfadd_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -1019,13 +1019,13 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1044,13 +1044,13 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1073,9 +1073,9 @@ define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1094,9 +1094,9 @@ define <vscale x 16 x half> @vfadd_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1113,13 +1113,13 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1138,13 +1138,13 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1291,7 +1291,7 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -1304,9 +1304,9 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16, v0.t
@@ -1364,20 +1364,20 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v24
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
index ec6ab422d6405..0c5bb88d974a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll
@@ -1944,10 +1944,8 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1958,12 +1956,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1976,12 +1972,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1995,10 +1989,8 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2009,12 +2001,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2027,12 +2017,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2046,10 +2034,8 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2060,12 +2046,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2078,12 +2062,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2097,10 +2079,8 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2111,12 +2091,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2129,12 +2107,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2148,12 +2124,11 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2165,13 +2140,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2185,13 +2159,12 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2203,9 +2176,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2216,9 +2189,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2231,9 +2204,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2247,12 +2220,11 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2264,13 +2236,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2284,13 +2255,12 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2304,10 +2274,9 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2318,12 +2287,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2336,12 +2304,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2355,10 +2322,9 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2369,12 +2335,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2387,12 +2352,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2406,10 +2370,9 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2420,12 +2383,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2438,12 +2400,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2457,10 +2418,9 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2471,12 +2431,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2489,12 +2448,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2540,9 +2498,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2553,9 +2511,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2568,9 +2526,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2619,10 +2577,8 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2633,12 +2589,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2651,12 +2605,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2670,10 +2622,8 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2684,12 +2634,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2702,12 +2650,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2721,10 +2667,8 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2735,12 +2679,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2753,12 +2695,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2772,10 +2712,8 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2786,12 +2724,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2804,12 +2740,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2823,12 +2757,11 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2839,14 +2772,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2859,14 +2791,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2878,9 +2809,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ord_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2891,9 +2822,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2906,9 +2837,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2922,12 +2853,11 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2938,14 +2868,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2958,14 +2887,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2979,10 +2907,9 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2993,12 +2920,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3011,12 +2937,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3030,10 +2955,9 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3044,12 +2968,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3062,12 +2985,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3081,10 +3003,9 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3095,12 +3016,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3113,12 +3033,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3132,10 +3051,9 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3146,12 +3064,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3164,12 +3081,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3215,9 +3131,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uno_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -3228,9 +3144,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3243,9 +3159,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f16(<vscale x 16 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -3294,10 +3210,8 @@ define <vscale x 32 x i1> @fcmp_ogt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3308,12 +3222,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3326,12 +3238,10 @@ define <vscale x 32 x i1> @fcmp_ogt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3345,10 +3255,8 @@ define <vscale x 32 x i1> @fcmp_oge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3359,12 +3267,10 @@ define <vscale x 32 x i1> @fcmp_oge_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3377,12 +3283,10 @@ define <vscale x 32 x i1> @fcmp_oge_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3396,10 +3300,8 @@ define <vscale x 32 x i1> @fcmp_olt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3410,12 +3312,10 @@ define <vscale x 32 x i1> @fcmp_olt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3428,12 +3328,10 @@ define <vscale x 32 x i1> @fcmp_olt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3447,10 +3345,8 @@ define <vscale x 32 x i1> @fcmp_ole_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3461,12 +3357,10 @@ define <vscale x 32 x i1> @fcmp_ole_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3479,12 +3373,10 @@ define <vscale x 32 x i1> @fcmp_ole_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3498,12 +3390,11 @@ define <vscale x 32 x i1> @fcmp_one_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3514,14 +3405,13 @@ define <vscale x 32 x i1> @fcmp_one_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3534,14 +3424,13 @@ define <vscale x 32 x i1> @fcmp_one_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3553,9 +3442,9 @@ define <vscale x 32 x i1> @fcmp_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_ord_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3566,9 +3455,9 @@ define <vscale x 32 x i1> @fcmp_ord_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3581,9 +3470,9 @@ define <vscale x 32 x i1> @fcmp_ord_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3597,12 +3486,11 @@ define <vscale x 32 x i1> @fcmp_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3613,14 +3501,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3633,14 +3520,13 @@ define <vscale x 32 x i1> @fcmp_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3654,10 +3540,9 @@ define <vscale x 32 x i1> @fcmp_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3668,12 +3553,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3686,12 +3570,11 @@ define <vscale x 32 x i1> @fcmp_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3705,10 +3588,9 @@ define <vscale x 32 x i1> @fcmp_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3719,12 +3601,11 @@ define <vscale x 32 x i1> @fcmp_uge_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3737,12 +3618,11 @@ define <vscale x 32 x i1> @fcmp_uge_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3756,10 +3636,9 @@ define <vscale x 32 x i1> @fcmp_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3770,12 +3649,11 @@ define <vscale x 32 x i1> @fcmp_ult_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3788,12 +3666,11 @@ define <vscale x 32 x i1> @fcmp_ult_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3807,10 +3684,9 @@ define <vscale x 32 x i1> @fcmp_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3821,12 +3697,11 @@ define <vscale x 32 x i1> @fcmp_ule_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3839,12 +3714,11 @@ define <vscale x 32 x i1> @fcmp_ule_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3890,9 +3764,9 @@ define <vscale x 32 x i1> @fcmp_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; CHECK-LABEL: fcmp_uno_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3903,9 +3777,9 @@ define <vscale x 32 x i1> @fcmp_uno_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3918,9 +3792,9 @@ define <vscale x 32 x i1> @fcmp_uno_fv_nxv32f16(<vscale x 32 x half> %va, half %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -5235,10 +5109,8 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5249,12 +5121,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5267,12 +5137,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5286,10 +5154,8 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5300,12 +5166,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5318,12 +5182,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5337,10 +5199,8 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5351,12 +5211,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5369,12 +5227,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5388,10 +5244,8 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5402,12 +5256,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5420,12 +5272,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5439,12 +5289,11 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5456,13 +5305,12 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5476,13 +5324,12 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5494,9 +5341,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5507,9 +5354,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5522,9 +5369,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5538,12 +5385,11 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5555,13 +5401,12 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5575,13 +5420,12 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5595,10 +5439,9 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5609,12 +5452,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5627,12 +5469,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5646,10 +5487,9 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5660,12 +5500,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5678,12 +5517,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5697,10 +5535,9 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5711,12 +5548,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5729,12 +5565,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5748,10 +5583,9 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5762,12 +5596,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5780,12 +5613,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5831,9 +5663,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -5844,9 +5676,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5859,9 +5691,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -5910,10 +5742,8 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5924,12 +5754,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5942,12 +5770,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5961,10 +5787,8 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5975,12 +5799,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5993,12 +5815,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6012,10 +5832,8 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6026,12 +5844,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6044,12 +5860,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6063,10 +5877,8 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6077,12 +5889,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6095,12 +5905,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6114,12 +5922,11 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6130,14 +5937,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6150,14 +5956,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6169,9 +5974,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6182,9 +5987,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6197,9 +6002,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6213,12 +6018,11 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6229,14 +6033,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6249,14 +6052,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6270,10 +6072,9 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6284,12 +6085,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6302,12 +6102,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6321,10 +6120,9 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6335,12 +6133,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6353,12 +6150,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6372,10 +6168,9 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6386,12 +6181,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6404,12 +6198,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6423,10 +6216,9 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6437,12 +6229,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6455,12 +6246,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6506,9 +6296,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -6519,9 +6309,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6534,9 +6324,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -6585,10 +6375,8 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6599,12 +6387,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6617,12 +6403,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6636,10 +6420,8 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6650,12 +6432,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6668,12 +6448,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6687,10 +6465,8 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6701,12 +6477,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6719,12 +6493,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6738,10 +6510,8 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6752,12 +6522,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6770,12 +6538,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6789,12 +6555,11 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6805,14 +6570,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6825,14 +6589,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6844,9 +6607,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_ord_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6857,9 +6620,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6872,9 +6635,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6888,12 +6651,11 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6904,14 +6666,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6924,14 +6685,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6945,10 +6705,9 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -6959,12 +6718,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6977,12 +6735,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6996,10 +6753,9 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7010,12 +6766,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7028,12 +6783,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7047,10 +6801,9 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7061,12 +6814,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7079,12 +6831,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7098,10 +6849,9 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7112,12 +6862,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7130,12 +6879,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7181,9 +6929,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsca
 ; CHECK-LABEL: fcmp_uno_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -7194,9 +6942,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7209,9 +6957,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f32(<vscale x 16 x float> %va, float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -7893,10 +7641,8 @@ define <vscale x 2 x i1> @fcmp_ogt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -7907,12 +7653,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7925,12 +7669,10 @@ define <vscale x 2 x i1> @fcmp_ogt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7944,10 +7686,8 @@ define <vscale x 2 x i1> @fcmp_oge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -7958,12 +7698,10 @@ define <vscale x 2 x i1> @fcmp_oge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7976,12 +7714,10 @@ define <vscale x 2 x i1> @fcmp_oge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -7995,10 +7731,8 @@ define <vscale x 2 x i1> @fcmp_olt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8009,12 +7743,10 @@ define <vscale x 2 x i1> @fcmp_olt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8027,12 +7759,10 @@ define <vscale x 2 x i1> @fcmp_olt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8046,10 +7776,8 @@ define <vscale x 2 x i1> @fcmp_ole_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8060,12 +7788,10 @@ define <vscale x 2 x i1> @fcmp_ole_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8078,12 +7804,10 @@ define <vscale x 2 x i1> @fcmp_ole_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8097,12 +7821,11 @@ define <vscale x 2 x i1> @fcmp_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8114,13 +7837,12 @@ define <vscale x 2 x i1> @fcmp_one_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8134,13 +7856,12 @@ define <vscale x 2 x i1> @fcmp_one_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8152,9 +7873,9 @@ define <vscale x 2 x i1> @fcmp_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v12, v10, v10
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vv v10, v10, v10
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8165,9 +7886,9 @@ define <vscale x 2 x i1> @fcmp_ord_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8180,9 +7901,9 @@ define <vscale x 2 x i1> @fcmp_ord_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8196,12 +7917,11 @@ define <vscale x 2 x i1> @fcmp_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v13, v8, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8213,13 +7933,12 @@ define <vscale x 2 x i1> @fcmp_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v12, v13
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8233,13 +7952,12 @@ define <vscale x 2 x i1> @fcmp_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmfeq.vf v13, v10, fa0
-; CHECK-NEXT:    vmand.mm v10, v13, v12
-; CHECK-NEXT:    vmv1r.v v11, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v10, v11
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmv1r.v v10, v0
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8253,10 +7971,9 @@ define <vscale x 2 x i1> @fcmp_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8267,12 +7984,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8285,12 +8001,11 @@ define <vscale x 2 x i1> @fcmp_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8304,10 +8019,9 @@ define <vscale x 2 x i1> @fcmp_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v10, v10
 ; CHECK-NEXT:    vmfeq.vv v13, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v8, v10, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8318,12 +8032,11 @@ define <vscale x 2 x i1> @fcmp_uge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8336,12 +8049,11 @@ define <vscale x 2 x i1> @fcmp_uge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8355,10 +8067,9 @@ define <vscale x 2 x i1> @fcmp_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8369,12 +8080,11 @@ define <vscale x 2 x i1> @fcmp_ult_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8387,12 +8097,11 @@ define <vscale x 2 x i1> @fcmp_ult_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8406,10 +8115,9 @@ define <vscale x 2 x i1> @fcmp_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v12, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v13, v10, v10
-; CHECK-NEXT:    vmand.mm v12, v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vv v12, v10, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8420,12 +8128,11 @@ define <vscale x 2 x i1> @fcmp_ule_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v12, v10
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v11
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8438,12 +8145,11 @@ define <vscale x 2 x i1> @fcmp_ule_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfeq.vf v12, v10, fa0
-; CHECK-NEXT:    vmfeq.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v10, v10, v12
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfeq.vf v10, v10, fa0
+; CHECK-NEXT:    vmfeq.vv v11, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8489,9 +8195,9 @@ define <vscale x 2 x i1> @fcmp_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfne.vv v12, v10, v10
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vv v10, v10, v10
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -8502,9 +8208,9 @@ define <vscale x 2 x i1> @fcmp_uno_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v10, v12
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8517,9 +8223,9 @@ define <vscale x 2 x i1> @fcmp_uno_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfne.vf v12, v10, fa0
-; CHECK-NEXT:    vmfne.vv v10, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v10
+; CHECK-NEXT:    vmfne.vf v10, v10, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -8568,10 +8274,8 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8582,12 +8286,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8600,12 +8302,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8619,10 +8319,8 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8633,12 +8331,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8651,12 +8347,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8670,10 +8364,8 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8684,12 +8376,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8702,12 +8392,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8721,10 +8409,8 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8735,12 +8421,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8753,12 +8437,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8772,12 +8454,11 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8788,14 +8469,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8808,14 +8488,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8827,9 +8506,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v16, v12, v12
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vv v12, v12, v12
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8840,9 +8519,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8855,9 +8534,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8871,12 +8550,11 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v17, v8, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8887,14 +8565,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8907,14 +8584,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v13, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v12, v13
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmv1r.v v12, v0
+; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8928,10 +8604,9 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8942,12 +8617,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8960,12 +8634,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -8979,10 +8652,9 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v12, v12
 ; CHECK-NEXT:    vmfeq.vv v17, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v8, v12, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -8993,12 +8665,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9011,12 +8682,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9030,10 +8700,9 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9044,12 +8713,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9062,12 +8730,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9081,10 +8748,9 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v16, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v17, v12, v12
-; CHECK-NEXT:    vmand.mm v16, v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vv v16, v12, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v0, v12, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9095,12 +8761,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v16, v12
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v13
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9113,12 +8778,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfeq.vf v16, v12, fa0
-; CHECK-NEXT:    vmfeq.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v12, v12, v16
-; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfeq.vf v12, v12, fa0
+; CHECK-NEXT:    vmfeq.vv v13, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9164,9 +8828,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v16, v12, v12
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vv v12, v12, v12
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -9177,9 +8841,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9192,9 +8856,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfne.vf v16, v12, fa0
-; CHECK-NEXT:    vmfne.vv v12, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v12, v12, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -9243,10 +8907,8 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9257,12 +8919,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9275,12 +8935,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9294,10 +8952,8 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9308,12 +8964,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9326,12 +8980,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9345,10 +8997,8 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9359,12 +9009,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9377,12 +9025,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9396,10 +9042,8 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9410,12 +9054,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9428,12 +9070,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9447,12 +9087,11 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9463,14 +9102,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9483,14 +9121,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9502,9 +9139,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v24, v16, v16
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vv v16, v16, v16
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9515,9 +9152,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9530,9 +9167,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9546,12 +9183,11 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v25, v8, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v24, v25
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9562,14 +9198,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9582,14 +9217,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v17, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0, v0.t
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnor.mm v0, v16, v17
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmv1r.v v16, v0
+; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnor.mm v0, v0, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9603,10 +9237,9 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9617,12 +9250,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9635,12 +9267,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9654,10 +9285,9 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v16, v16
 ; CHECK-NEXT:    vmfeq.vv v25, v8, v8
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v8, v16, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v8, v16, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9668,12 +9298,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9686,12 +9315,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9705,10 +9333,9 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmfle.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9719,12 +9346,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9737,12 +9363,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9756,10 +9381,9 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmfeq.vv v24, v8, v8
 ; CHECK-NEXT:    vmfeq.vv v25, v16, v16
-; CHECK-NEXT:    vmand.mm v24, v25, v24
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vmflt.vv v24, v16, v8, v0.t
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v0, v16, v8, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9770,12 +9394,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v24, v16
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v17
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9788,12 +9411,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfeq.vf v24, v16, fa0
-; CHECK-NEXT:    vmfeq.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v16, v16, v24
-; CHECK-NEXT:    vmv1r.v v0, v16
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0, v0.t
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfeq.vf v16, v16, fa0
+; CHECK-NEXT:    vmfeq.vv v17, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0, v0.t
+; CHECK-NEXT:    vmnot.m v0, v0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9839,9 +9461,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale
 ; CHECK-LABEL: fcmp_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfne.vv v24, v16, v16
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vv v16, v16, v16
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -9852,9 +9474,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v16, v24
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -9867,9 +9489,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfne.vf v24, v16, fa0
-; CHECK-NEXT:    vmfne.vv v16, v8, v8
-; CHECK-NEXT:    vmor.mm v0, v24, v16
+; CHECK-NEXT:    vmfne.vf v16, v16, fa0
+; CHECK-NEXT:    vmfne.vv v8, v8, v8
+; CHECK-NEXT:    vmor.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
index 2ca9dd24e915a..dc3187b0e23d6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll
@@ -1606,8 +1606,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1618,8 +1618,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1632,8 +1632,8 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1782,8 +1782,8 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1794,8 +1794,8 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1808,8 +1808,8 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1821,9 +1821,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1834,9 +1834,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1849,9 +1849,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1864,8 +1864,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1876,8 +1876,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1890,8 +1890,8 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1903,8 +1903,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1914,8 +1914,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1927,8 +1927,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1940,8 +1940,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1951,8 +1951,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1964,8 +1964,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1977,8 +1977,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -1988,8 +1988,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2001,8 +2001,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2014,8 +2014,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2025,8 +2025,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2038,8 +2038,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2052,8 +2052,8 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2064,8 +2064,8 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2078,8 +2078,8 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2091,10 +2091,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -2105,10 +2105,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2121,10 +2121,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2138,8 +2138,8 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2150,8 +2150,8 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2164,8 +2164,8 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2314,8 +2314,8 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2326,8 +2326,8 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2340,8 +2340,8 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2353,9 +2353,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ord_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2366,9 +2366,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2381,9 +2381,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2396,8 +2396,8 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2408,8 +2408,8 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2422,8 +2422,8 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2435,8 +2435,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ugt_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2446,8 +2446,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2459,8 +2459,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2472,8 +2472,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uge_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2483,8 +2483,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2496,8 +2496,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2509,8 +2509,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ult_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2520,8 +2520,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2533,8 +2533,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2546,8 +2546,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ule_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2557,8 +2557,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_vf_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2570,8 +2570,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_fv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2584,8 +2584,8 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2596,8 +2596,8 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2610,8 +2610,8 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2623,10 +2623,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uno_vv_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -2637,10 +2637,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2653,10 +2653,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f16(<vscale x 16 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2670,8 +2670,8 @@ define <vscale x 32 x i1> @fcmps_oeq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2682,8 +2682,8 @@ define <vscale x 32 x i1> @fcmps_oeq_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2696,8 +2696,8 @@ define <vscale x 32 x i1> @fcmps_oeq_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2846,8 +2846,8 @@ define <vscale x 32 x i1> @fcmps_one_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2858,8 +2858,8 @@ define <vscale x 32 x i1> @fcmps_one_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2872,8 +2872,8 @@ define <vscale x 32 x i1> @fcmps_one_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2885,9 +2885,9 @@ define <vscale x 32 x i1> @fcmps_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ord_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2898,9 +2898,9 @@ define <vscale x 32 x i1> @fcmps_ord_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2913,9 +2913,9 @@ define <vscale x 32 x i1> @fcmps_ord_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2928,8 +2928,8 @@ define <vscale x 32 x i1> @fcmps_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2940,8 +2940,8 @@ define <vscale x 32 x i1> @fcmps_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2954,8 +2954,8 @@ define <vscale x 32 x i1> @fcmps_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2967,8 +2967,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ugt_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -2978,8 +2978,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -2991,8 +2991,8 @@ define <vscale x 32 x i1> @fcmps_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ugt_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3004,8 +3004,8 @@ define <vscale x 32 x i1> @fcmps_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uge_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3015,8 +3015,8 @@ define <vscale x 32 x i1> @fcmps_uge_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3028,8 +3028,8 @@ define <vscale x 32 x i1> @fcmps_uge_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_uge_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3041,8 +3041,8 @@ define <vscale x 32 x i1> @fcmps_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ult_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3052,8 +3052,8 @@ define <vscale x 32 x i1> @fcmps_ult_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3065,8 +3065,8 @@ define <vscale x 32 x i1> @fcmps_ult_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ult_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3078,8 +3078,8 @@ define <vscale x 32 x i1> @fcmps_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_ule_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3089,8 +3089,8 @@ define <vscale x 32 x i1> @fcmps_ule_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_vf_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3102,8 +3102,8 @@ define <vscale x 32 x i1> @fcmps_ule_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK-LABEL: fcmps_ule_fv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3116,8 +3116,8 @@ define <vscale x 32 x i1> @fcmps_une_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3128,8 +3128,8 @@ define <vscale x 32 x i1> @fcmps_une_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3142,8 +3142,8 @@ define <vscale x 32 x i1> @fcmps_une_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3155,10 +3155,10 @@ define <vscale x 32 x i1> @fcmps_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; CHECK-LABEL: fcmps_uno_vv_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 32 x i1> %1
@@ -3169,10 +3169,10 @@ define <vscale x 32 x i1> @fcmps_uno_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vv v24, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v16, fa0
-; CHECK-NEXT:    vmnot.m v9, v24
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v16, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -3185,10 +3185,10 @@ define <vscale x 32 x i1> @fcmps_uno_fv_nxv32f16(<vscale x 32 x half> %va, half
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -4266,8 +4266,8 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4278,8 +4278,8 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4292,8 +4292,8 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4442,8 +4442,8 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4454,8 +4454,8 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4468,8 +4468,8 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4481,9 +4481,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4494,9 +4494,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4509,9 +4509,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4524,8 +4524,8 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4536,8 +4536,8 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4550,8 +4550,8 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4563,8 +4563,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4574,8 +4574,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4587,8 +4587,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4600,8 +4600,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4611,8 +4611,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4624,8 +4624,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4637,8 +4637,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4648,8 +4648,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4661,8 +4661,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4674,8 +4674,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4685,8 +4685,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_vf_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4698,8 +4698,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_fv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4712,8 +4712,8 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4724,8 +4724,8 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4738,8 +4738,8 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4751,10 +4751,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -4765,10 +4765,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4781,10 +4781,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -4798,8 +4798,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -4810,8 +4810,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -4824,8 +4824,8 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -4974,8 +4974,8 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -4986,8 +4986,8 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5000,8 +5000,8 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5013,9 +5013,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5026,9 +5026,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5041,9 +5041,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5056,8 +5056,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5068,8 +5068,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5082,8 +5082,8 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5095,8 +5095,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5106,8 +5106,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5119,8 +5119,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5132,8 +5132,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5143,8 +5143,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5156,8 +5156,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5169,8 +5169,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5180,8 +5180,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5193,8 +5193,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5206,8 +5206,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5217,8 +5217,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5230,8 +5230,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5244,8 +5244,8 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5256,8 +5256,8 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5270,8 +5270,8 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5283,10 +5283,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -5297,10 +5297,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5313,10 +5313,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -5330,8 +5330,8 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5342,8 +5342,8 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5356,8 +5356,8 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5506,8 +5506,8 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5518,8 +5518,8 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5532,8 +5532,8 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5545,9 +5545,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ord_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5558,9 +5558,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5573,9 +5573,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5588,8 +5588,8 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5600,8 +5600,8 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5614,8 +5614,8 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5627,8 +5627,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ugt_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5638,8 +5638,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ugt_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5651,8 +5651,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ugt_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5664,8 +5664,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_uge_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5675,8 +5675,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_uge_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5688,8 +5688,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_uge_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5701,8 +5701,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ult_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5712,8 +5712,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ult_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5725,8 +5725,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ult_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5738,8 +5738,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_ule_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5749,8 +5749,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ule_vf_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5762,8 +5762,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK-LABEL: fcmps_ule_fv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5776,8 +5776,8 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5788,8 +5788,8 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5802,8 +5802,8 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5815,10 +5815,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsc
 ; CHECK-LABEL: fcmps_uno_vv_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 16 x i1> %1
@@ -5829,10 +5829,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vv v24, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v16, fa0
-; CHECK-NEXT:    vmnot.m v9, v24
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v16, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -5845,10 +5845,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f32(<vscale x 16 x float> %va, floa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
@@ -6394,8 +6394,8 @@ define <vscale x 2 x i1> @fcmps_oeq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6406,8 +6406,8 @@ define <vscale x 2 x i1> @fcmps_oeq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6420,8 +6420,8 @@ define <vscale x 2 x i1> @fcmps_oeq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6570,8 +6570,8 @@ define <vscale x 2 x i1> @fcmps_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6582,8 +6582,8 @@ define <vscale x 2 x i1> @fcmps_one_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6596,8 +6596,8 @@ define <vscale x 2 x i1> @fcmps_one_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6609,9 +6609,9 @@ define <vscale x 2 x i1> @fcmps_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6622,9 +6622,9 @@ define <vscale x 2 x i1> @fcmps_ord_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v10, v12
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6637,9 +6637,9 @@ define <vscale x 2 x i1> @fcmps_ord_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v10
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6652,8 +6652,8 @@ define <vscale x 2 x i1> @fcmps_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmflt.vv v13, v10, v8
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6664,8 +6664,8 @@ define <vscale x 2 x i1> @fcmps_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6678,8 +6678,8 @@ define <vscale x 2 x i1> @fcmps_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmflt.vf v11, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v11, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6691,8 +6691,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6702,8 +6702,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6715,8 +6715,8 @@ define <vscale x 2 x i1> @fcmps_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6728,8 +6728,8 @@ define <vscale x 2 x i1> @fcmps_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v8, v10
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v8, v10
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6739,8 +6739,8 @@ define <vscale x 2 x i1> @fcmps_uge_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6752,8 +6752,8 @@ define <vscale x 2 x i1> @fcmps_uge_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6765,8 +6765,8 @@ define <vscale x 2 x i1> @fcmps_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6776,8 +6776,8 @@ define <vscale x 2 x i1> @fcmps_ult_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6789,8 +6789,8 @@ define <vscale x 2 x i1> @fcmps_ult_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6802,8 +6802,8 @@ define <vscale x 2 x i1> @fcmps_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vv v12, v10, v8
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vv v8, v10, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6813,8 +6813,8 @@ define <vscale x 2 x i1> @fcmps_ule_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfgt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6826,8 +6826,8 @@ define <vscale x 2 x i1> @fcmps_ule_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmflt.vf v10, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6840,8 +6840,8 @@ define <vscale x 2 x i1> @fcmps_une_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vv v12, v10, v8
-; CHECK-NEXT:    vmfle.vv v13, v8, v10
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v10
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6852,8 +6852,8 @@ define <vscale x 2 x i1> @fcmps_une_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfge.vf v10, v8, fa0
-; CHECK-NEXT:    vmfle.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6866,8 +6866,8 @@ define <vscale x 2 x i1> @fcmps_une_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vmfle.vf v10, v8, fa0
-; CHECK-NEXT:    vmfge.vf v11, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v11, v10
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6879,10 +6879,10 @@ define <vscale x 2 x i1> @fcmps_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vmfle.vv v12, v10, v10
-; CHECK-NEXT:    vmfle.vv v10, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v10
-; CHECK-NEXT:    vmorn.mm v0, v8, v12
+; CHECK-NEXT:    vmfle.vv v10, v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 2 x i1> %1
@@ -6893,10 +6893,10 @@ define <vscale x 2 x i1> @fcmps_uno_vf_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v10, fa0
-; CHECK-NEXT:    vmnot.m v9, v12
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v10, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6909,10 +6909,10 @@ define <vscale x 2 x i1> @fcmps_uno_fv_nxv2f64(<vscale x 2 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v10, fa0
-; CHECK-NEXT:    vmfle.vf v12, v10, fa0
-; CHECK-NEXT:    vmnot.m v10, v12
-; CHECK-NEXT:    vmfle.vv v11, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v10, v11
+; CHECK-NEXT:    vmfle.vf v10, v10, fa0
+; CHECK-NEXT:    vmnot.m v10, v10
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
@@ -6926,8 +6926,8 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -6938,8 +6938,8 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -6952,8 +6952,8 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7102,8 +7102,8 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7114,8 +7114,8 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7128,8 +7128,8 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7141,9 +7141,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7154,9 +7154,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7169,9 +7169,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v12
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7184,8 +7184,8 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmflt.vv v17, v12, v8
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7196,8 +7196,8 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7210,8 +7210,8 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmflt.vf v13, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v13, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7223,8 +7223,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7234,8 +7234,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7247,8 +7247,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7260,8 +7260,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v8, v12
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v8, v12
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7271,8 +7271,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7284,8 +7284,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7297,8 +7297,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7308,8 +7308,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7321,8 +7321,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7334,8 +7334,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v16, v12, v8
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vv v8, v12, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7345,8 +7345,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfgt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7358,8 +7358,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmflt.vf v12, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7372,8 +7372,8 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vv v16, v12, v8
-; CHECK-NEXT:    vmfle.vv v17, v8, v12
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v12
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7384,8 +7384,8 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfge.vf v12, v8, fa0
-; CHECK-NEXT:    vmfle.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7398,8 +7398,8 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vmfle.vf v12, v8, fa0
-; CHECK-NEXT:    vmfge.vf v13, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v13, v12
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7411,10 +7411,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v16, v12, v12
-; CHECK-NEXT:    vmfle.vv v12, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v12
-; CHECK-NEXT:    vmorn.mm v0, v8, v16
+; CHECK-NEXT:    vmfle.vv v12, v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v12
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 4 x i1> %1
@@ -7425,10 +7425,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v12, fa0
-; CHECK-NEXT:    vmnot.m v9, v16
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v12, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7441,10 +7441,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f64(<vscale x 4 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v12, fa0
-; CHECK-NEXT:    vmfle.vf v16, v12, fa0
-; CHECK-NEXT:    vmnot.m v12, v16
-; CHECK-NEXT:    vmfle.vv v13, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v12, v13
+; CHECK-NEXT:    vmfle.vf v12, v12, fa0
+; CHECK-NEXT:    vmnot.m v12, v12
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v12, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
@@ -7458,8 +7458,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7470,8 +7470,8 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7484,8 +7484,8 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7634,8 +7634,8 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7646,8 +7646,8 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7660,8 +7660,8 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7673,9 +7673,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ord_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7686,9 +7686,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v16, v24
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7701,9 +7701,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmand.mm v0, v24, v16
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmand.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7716,8 +7716,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmnor.mm v0, v25, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnor.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7728,8 +7728,8 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7742,8 +7742,8 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmflt.vf v17, v8, fa0
-; CHECK-NEXT:    vmnor.mm v0, v17, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnor.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7755,8 +7755,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ugt_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7766,8 +7766,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7779,8 +7779,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ugt_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7792,8 +7792,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uge_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v8, v16
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v8, v16
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7803,8 +7803,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7816,8 +7816,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_uge_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7829,8 +7829,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ult_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmfle.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7840,8 +7840,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7853,8 +7853,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ult_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7866,8 +7866,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_ule_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vv v24, v16, v8
-; CHECK-NEXT:    vmnot.m v0, v24
+; CHECK-NEXT:    vmflt.vv v8, v16, v8
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7877,8 +7877,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfgt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmfgt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7890,8 +7890,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK-LABEL: fcmps_ule_fv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmflt.vf v16, v8, fa0
-; CHECK-NEXT:    vmnot.m v0, v16
+; CHECK-NEXT:    vmflt.vf v8, v8, fa0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7904,8 +7904,8 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vv v24, v16, v8
-; CHECK-NEXT:    vmfle.vv v25, v8, v16
-; CHECK-NEXT:    vmnand.mm v0, v25, v24
+; CHECK-NEXT:    vmfle.vv v8, v8, v16
+; CHECK-NEXT:    vmnand.mm v0, v8, v24
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7916,8 +7916,8 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfge.vf v16, v8, fa0
-; CHECK-NEXT:    vmfle.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfle.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7930,8 +7930,8 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vmfle.vf v16, v8, fa0
-; CHECK-NEXT:    vmfge.vf v17, v8, fa0
-; CHECK-NEXT:    vmnand.mm v0, v17, v16
+; CHECK-NEXT:    vmfge.vf v8, v8, fa0
+; CHECK-NEXT:    vmnand.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7943,10 +7943,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscal
 ; CHECK-LABEL: fcmps_uno_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vmfle.vv v24, v16, v16
-; CHECK-NEXT:    vmfle.vv v16, v8, v8
-; CHECK-NEXT:    vmnot.m v8, v16
-; CHECK-NEXT:    vmorn.mm v0, v8, v24
+; CHECK-NEXT:    vmfle.vv v16, v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v16
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp
   ret <vscale x 8 x i1> %1
@@ -7957,10 +7957,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vv v24, v8, v8
-; CHECK-NEXT:    vmfle.vf v8, v16, fa0
-; CHECK-NEXT:    vmnot.m v9, v24
-; CHECK-NEXT:    vmorn.mm v0, v9, v8
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmfle.vf v9, v16, fa0
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -7973,10 +7973,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f64(<vscale x 8 x double> %va, double
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v16, fa0
-; CHECK-NEXT:    vmfle.vf v24, v16, fa0
-; CHECK-NEXT:    vmnot.m v16, v24
-; CHECK-NEXT:    vmfle.vv v17, v8, v8
-; CHECK-NEXT:    vmorn.mm v0, v16, v17
+; CHECK-NEXT:    vmfle.vf v16, v16, fa0
+; CHECK-NEXT:    vmnot.m v16, v16
+; CHECK-NEXT:    vmfle.vv v8, v8, v8
+; CHECK-NEXT:    vmorn.mm v0, v16, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x double> poison, double %b, i32 0
   %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 83f588ce5027d..5d1b0f3295bf2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -194,16 +194,16 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
 ; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, -1
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
@@ -219,8 +219,8 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
 ; ZVFH-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v9, fa0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32:
@@ -229,10 +229,10 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vand.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x float> poison, float %s, i32 0
@@ -246,17 +246,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
 ; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
@@ -273,8 +273,8 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
 ; ZVFH-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v9, fa0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
+; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32:
@@ -284,9 +284,9 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -302,19 +302,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
 ; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFH-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
@@ -331,9 +331,9 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
 ; ZVFH-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v9, fa0
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFH-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
@@ -343,10 +343,10 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
 ; ZVFHMIN-NEXT:    vfmv.v.f v9, fa0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
@@ -362,9 +362,9 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
 ; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFH-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
@@ -372,10 +372,10 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
@@ -393,9 +393,9 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
 ; ZVFH-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; ZVFH-NEXT:    vfmv.v.f v9, fa0
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFH-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
 ; ZVFH-NEXT:    ret
 ;
@@ -406,10 +406,10 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    addi a1, a0, -1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vfncvt.rod.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v9
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vand.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
@@ -1232,8 +1232,8 @@ define <vscale x 1 x float> @vfcopysign_exttrunc_vv_nxv1f32_nxv1f64(<vscale x 1
 ; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v9
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v9, v9
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x float>
   %r = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> %vm, <vscale x 1 x float> %e)
@@ -1246,8 +1246,8 @@ define <vscale x 1 x float> @vfcopysign_exttrunc_vf_nxv1f32_nxv1f64(<vscale x 1
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v9
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v9, v9
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -1260,8 +1260,8 @@ define <vscale x 1 x float> @vfcopynsign_exttrunc_vv_nxv1f32_nxv1f64(<vscale x 1
 ; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v9
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v9, v9
+; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %n = fneg <vscale x 1 x double> %vs
   %eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x float>
@@ -1275,8 +1275,8 @@ define <vscale x 1 x float> @vfcopynsign_exttrunc_vf_nxv1f32_nxv1f64(<vscale x 1
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v9
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v9, v9
+; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x double> poison, double %s, i32 0
   %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -1904,9 +1904,9 @@ define <vscale x 8 x double> @vfcopysign_exttrunc_vv_nxv8f64_nxv8f16(<vscale x 8
 ; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v20, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v24
+; CHECK-NEXT:    vfwcvt.f.f.v v16, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfsgnj.vv v8, v8, v16
 ; CHECK-NEXT:    ret
@@ -1933,9 +1933,9 @@ define <vscale x 8 x double> @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16(<vscale x
 ; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v20, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v24
+; CHECK-NEXT:    vfwcvt.f.f.v v16, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-NEXT:    vfsgnjn.vv v8, v8, v16
 ; CHECK-NEXT:    ret
@@ -2020,9 +2020,9 @@ define <vscale x 2 x float> @fptrunc_of_copysign_nxv2f32_nxv2f64(<vscale x 2 x d
 ; CHECK-LABEL: fptrunc_of_copysign_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vfsgnj.vv v10, v8, v10
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %copy = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> %X, <vscale x 2 x double> %Y)
   %trunc = fptrunc <vscale x 2 x double> %copy to <vscale x 2 x float>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
index 818b882a402ac..8aa0fd04e3f81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -37,9 +37,9 @@ define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -72,9 +72,9 @@ define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -89,9 +89,9 @@ define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v12, v10
+; CHECK-NEXT:    vfdiv.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -103,13 +103,13 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -124,9 +124,9 @@ define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v16, v12
+; CHECK-NEXT:    vfdiv.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -138,13 +138,13 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v12, v8
+; CHECK-NEXT:    vfdiv.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -157,13 +157,13 @@ define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v8, v12
+; CHECK-NEXT:    vfdiv.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -178,9 +178,9 @@ define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v24, v16
+; CHECK-NEXT:    vfdiv.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -192,13 +192,13 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v16, v8
+; CHECK-NEXT:    vfdiv.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -209,34 +209,19 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp {
 ; CHECK-LABEL: vfdiv_vv_nxv32bf16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v0, v16
+; CHECK-NEXT:    vfdiv.vv v24, v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v24, v0, v24
+; CHECK-NEXT:    vfdiv.vv v16, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -246,49 +231,23 @@ entry:
 define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp {
 ; CHECK-LABEL: vfdiv_vf_nxv32bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v24, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v16
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v24, v16, v0
+; CHECK-NEXT:    vfdiv.vv v16, v16, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v0, v8
+; CHECK-NEXT:    vfdiv.vv v24, v8, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
@@ -310,9 +269,9 @@ define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x half> @llvm.experimental.constrained.fdiv.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -334,9 +293,9 @@ define <vscale x 1 x half> @vfdiv_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -358,9 +317,9 @@ define <vscale x 2 x half> @vfdiv_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x half> @llvm.experimental.constrained.fdiv.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -382,9 +341,9 @@ define <vscale x 2 x half> @vfdiv_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -406,9 +365,9 @@ define <vscale x 4 x half> @vfdiv_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x half> @llvm.experimental.constrained.fdiv.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -426,13 +385,13 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -454,9 +413,9 @@ define <vscale x 8 x half> @vfdiv_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x half> @llvm.experimental.constrained.fdiv.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -474,13 +433,13 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -499,13 +458,13 @@ define <vscale x 8 x half> @vfdiv_fv_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -527,9 +486,9 @@ define <vscale x 16 x half> @vfdiv_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x half> @llvm.experimental.constrained.fdiv.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -547,13 +506,13 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -571,34 +530,19 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfdiv_vv_nxv32f16:
 ; ZVFHMIN:       # %bb.0: # %entry
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v0, v16
+; ZVFHMIN-NEXT:    vfdiv.vv v24, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v24, v0, v24
+; ZVFHMIN-NEXT:    vfdiv.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 32 x half> @llvm.experimental.constrained.fdiv.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -614,49 +558,23 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    addi a1, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v24, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v24, v16, v0
+; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v0, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v24, v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
index 60b49874ceaea..6edba309190b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
@@ -15,9 +15,9 @@ define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 1 x bfloat> %va, %vb
   ret <vscale x 1 x bfloat> %vc
@@ -30,9 +30,9 @@ define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vf v9, v9, fa5
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -47,9 +47,9 @@ define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 2 x bfloat> %va, %vb
   ret <vscale x 2 x bfloat> %vc
@@ -62,9 +62,9 @@ define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vf v9, v9, fa5
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -79,9 +79,9 @@ define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v12, v10
+; CHECK-NEXT:    vfdiv.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 4 x bfloat> %va, %vb
   ret <vscale x 4 x bfloat> %vc
@@ -94,9 +94,9 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vf v10, v10, fa5
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -111,9 +111,9 @@ define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v16, v12
+; CHECK-NEXT:    vfdiv.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x bfloat> %vc
@@ -126,9 +126,9 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vf v12, v12, fa5
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -143,9 +143,9 @@ define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfrdiv.vf v12, v12, fa5
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -160,9 +160,9 @@ define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v24, v16
+; CHECK-NEXT:    vfdiv.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 16 x bfloat> %va, %vb
   ret <vscale x 16 x bfloat> %vc
@@ -175,9 +175,9 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vf v16, v16, fa5
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -188,34 +188,19 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
 ; CHECK-LABEL: vfdiv_vv_nxv32bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v0, v16
+; CHECK-NEXT:    vfdiv.vv v24, v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v24, v0, v24
+; CHECK-NEXT:    vfdiv.vv v16, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
   %vc = fdiv <vscale x 32 x bfloat> %va, %vb
   ret <vscale x 32 x bfloat> %vc
@@ -227,17 +212,17 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmv.v.x v24, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v16, v16, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v24, v24, v0
+; CHECK-NEXT:    vfdiv.vv v24, v8, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
@@ -261,9 +246,9 @@ define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 1 x half> %va, %vb
   ret <vscale x 1 x half> %vc
@@ -282,9 +267,9 @@ define <vscale x 1 x half> @vfdiv_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfdiv.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -305,9 +290,9 @@ define <vscale x 2 x half> @vfdiv_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 2 x half> %va, %vb
   ret <vscale x 2 x half> %vc
@@ -326,9 +311,9 @@ define <vscale x 2 x half> @vfdiv_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfdiv.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -349,9 +334,9 @@ define <vscale x 4 x half> @vfdiv_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 4 x half> %va, %vb
   ret <vscale x 4 x half> %vc
@@ -370,9 +355,9 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfdiv.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -393,9 +378,9 @@ define <vscale x 8 x half> @vfdiv_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x half> %vc
@@ -414,9 +399,9 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfdiv.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -437,9 +422,9 @@ define <vscale x 8 x half> @vfdiv_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfrdiv.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfrdiv.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -460,9 +445,9 @@ define <vscale x 16 x half> @vfdiv_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 16 x half> %va, %vb
   ret <vscale x 16 x half> %vc
@@ -481,9 +466,9 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfdiv.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -500,34 +485,19 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ;
 ; ZVFHMIN-LABEL: vfdiv_vv_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v0, v16
+; ZVFHMIN-NEXT:    vfdiv.vv v24, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v24, v0, v24
+; ZVFHMIN-NEXT:    vfdiv.vv v16, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
   %vc = fdiv <vscale x 32 x half> %va, %vb
   ret <vscale x 32 x half> %vc
@@ -545,17 +515,17 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    vmv.v.x v24, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v24, v24, v0
+; ZVFHMIN-NEXT:    vfdiv.vv v24, v8, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
index 736d575a1a4e3..4b7dfd7663143 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
@@ -21,9 +21,9 @@ define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,9 +36,9 @@ define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -53,9 +53,9 @@ define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -72,9 +72,9 @@ define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -91,9 +91,9 @@ define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -106,9 +106,9 @@ define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v9, v10
+; CHECK-NEXT:    vfdiv.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -123,9 +123,9 @@ define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -142,9 +142,9 @@ define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -161,9 +161,9 @@ define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v12, v10, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -176,9 +176,9 @@ define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v12, v10
+; CHECK-NEXT:    vfdiv.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -189,13 +189,13 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v10, v8, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -208,13 +208,13 @@ define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v10, v8
+; CHECK-NEXT:    vfdiv.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -231,9 +231,9 @@ define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v16, v12, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -246,9 +246,9 @@ define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v16, v12
+; CHECK-NEXT:    vfdiv.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -259,13 +259,13 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v12, v8, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -278,13 +278,13 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v12, v8
+; CHECK-NEXT:    vfdiv.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -301,9 +301,9 @@ define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v24, v16, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -316,9 +316,9 @@ define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v24, v16
+; CHECK-NEXT:    vfdiv.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -329,13 +329,13 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfdiv.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -348,13 +348,13 @@ define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v16, v8
+; CHECK-NEXT:    vfdiv.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -483,7 +483,7 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v16, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
@@ -496,9 +496,9 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v16, v24, v16, v0.t
@@ -550,20 +550,20 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmset.m v24
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmv.v.x v24, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v24, a2
+; CHECK-NEXT:    vslidedown.vx v0, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfdiv.vv v16, v16, v24, v0.t
@@ -609,9 +609,9 @@ define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -630,9 +630,9 @@ define <vscale x 1 x half> @vfdiv_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -653,9 +653,9 @@ define <vscale x 1 x half> @vfdiv_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -678,9 +678,9 @@ define <vscale x 1 x half> @vfdiv_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -703,9 +703,9 @@ define <vscale x 2 x half> @vfdiv_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -724,9 +724,9 @@ define <vscale x 2 x half> @vfdiv_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -747,9 +747,9 @@ define <vscale x 2 x half> @vfdiv_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -772,9 +772,9 @@ define <vscale x 2 x half> @vfdiv_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -797,9 +797,9 @@ define <vscale x 4 x half> @vfdiv_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -818,9 +818,9 @@ define <vscale x 4 x half> @vfdiv_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -837,13 +837,13 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -862,13 +862,13 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -891,9 +891,9 @@ define <vscale x 8 x half> @vfdiv_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -912,9 +912,9 @@ define <vscale x 8 x half> @vfdiv_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -931,13 +931,13 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -956,13 +956,13 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -985,9 +985,9 @@ define <vscale x 16 x half> @vfdiv_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1006,9 +1006,9 @@ define <vscale x 16 x half> @vfdiv_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1025,13 +1025,13 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1050,13 +1050,13 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1203,7 +1203,7 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -1216,9 +1216,9 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v16, v24, v16, v0.t
@@ -1276,20 +1276,20 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v24
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
index abfb652f2206e..5e2f7bde90a5a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
@@ -32,8 +32,8 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %
 ; CHECK-NEXT:    vsetvli a5, a5, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a6)
 ; CHECK-NEXT:    vle8.v v10, (a7)
-; CHECK-NEXT:    vmsne.vv v12, v8, v10
-; CHECK-NEXT:    vfirst.m a7, v12
+; CHECK-NEXT:    vmsne.vv v8, v8, v10
+; CHECK-NEXT:    vfirst.m a7, v8
 ; CHECK-NEXT:    mv a6, a5
 ; CHECK-NEXT:    bltz a7, .LBB0_5
 ; CHECK-NEXT:  # %bb.4: # %mismatch_vec_loop
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 0bfa68298f6b5..e7b5ad682c0eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -20,11 +20,11 @@ define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,11 +36,11 @@ define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v10, v11
+; CHECK-NEXT:    vfmadd.vv v8, v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -54,11 +54,11 @@ define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -94,11 +94,11 @@ define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -114,11 +114,11 @@ define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bf
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -134,11 +134,11 @@ define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -150,11 +150,11 @@ define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v10, v11
+; CHECK-NEXT:    vfmadd.vv v8, v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -168,11 +168,11 @@ define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -208,11 +208,11 @@ define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -228,11 +228,11 @@ define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked_commute(<vscale x 2 x bf
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -248,11 +248,11 @@ define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v10, v12, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v10, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -264,11 +264,11 @@ define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v10, v12
+; CHECK-NEXT:    vfmadd.vv v8, v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -282,11 +282,11 @@ define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v12, v10, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -300,9 +300,9 @@ define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_commute(<vscale x 4 x bfloat> %va
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
-; CHECK-NEXT:    vmv.v.x v14, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v14, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v12, v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -322,11 +322,11 @@ define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v12, v10
+; CHECK-NEXT:    vfmadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -342,11 +342,11 @@ define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked_commute(<vscale x 4 x bf
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v12, v10
+; CHECK-NEXT:    vfmadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -362,11 +362,11 @@ define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v12, v16, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v12, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -378,11 +378,11 @@ define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %v
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v12, v16
+; CHECK-NEXT:    vfmadd.vv v8, v12, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -396,11 +396,11 @@ define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v16, v12, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -414,9 +414,9 @@ define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_commute(<vscale x 8 x bfloat> %va
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
-; CHECK-NEXT:    vmv.v.x v20, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v16, v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -436,11 +436,11 @@ define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v16, v12
+; CHECK-NEXT:    vfmadd.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -456,11 +456,11 @@ define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked_commute(<vscale x 8 x bf
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v16, v12
+; CHECK-NEXT:    vfmadd.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -474,15 +474,13 @@ define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vs
 ; CHECK-LABEL: vfma_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v4, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v4, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -494,11 +492,11 @@ define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat>
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v0, v16, v24
+; CHECK-NEXT:    vfmadd.vv v8, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -507,18 +505,16 @@ define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat>
 define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfma_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
-; CHECK-NEXT:    vmv.v.x v4, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v4, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
+; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -532,9 +528,9 @@ define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_commute(<vscale x 16 x bfloat>
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
-; CHECK-NEXT:    vmv.v.x v4, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v4, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -554,11 +550,11 @@ define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat>
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v0, v24, v16
+; CHECK-NEXT:    vfmadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -574,11 +570,11 @@ define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked_commute(<vscale x 16 x
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v0, v24, v16
+; CHECK-NEXT:    vfmadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -623,8 +619,6 @@ define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vs
 ; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
@@ -638,10 +632,8 @@ define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vs
 ; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -721,15 +713,15 @@ define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vl8re16.v v16, (a0)
+; CHECK-NEXT:    vl8re16.v v24, (a0)
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmset.m v24
+; CHECK-NEXT:    vmset.m v16
 ; CHECK-NEXT:    slli a0, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a1, a0
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v24, a2
+; CHECK-NEXT:    vslidedown.vx v0, v16, a2
 ; CHECK-NEXT:    sltu a2, a1, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -737,11 +729,9 @@ define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
@@ -755,10 +745,8 @@ define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -830,16 +818,17 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 3
+; CHECK-NEXT:    slli a4, a4, 4
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 4
+; CHECK-NEXT:    slli a4, a4, 3
+; CHECK-NEXT:    mv a5, a4
+; CHECK-NEXT:    slli a4, a4, 1
+; CHECK-NEXT:    add a4, a4, a5
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -848,24 +837,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
 ; CHECK-NEXT:    vmv.v.x v8, a2
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a2, a2, a4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -876,7 +852,7 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
 ; CHECK-NEXT:  .LBB32_2:
 ; CHECK-NEXT:    vmv1r.v v0, v3
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -885,21 +861,21 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -907,7 +883,7 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -941,17 +917,17 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v3, v0
+; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    vmv.v.x v24, a1
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
@@ -961,20 +937,12 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
+; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    mv a3, a2
@@ -982,10 +950,14 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20, v0.t
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 4
+; CHECK-NEXT:    add a2, sp, a2
+; CHECK-NEXT:    addi a2, a2, 16
+; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -996,7 +968,7 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
 ; CHECK-NEXT:  .LBB33_2:
 ; CHECK-NEXT:    vmv1r.v v0, v3
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -1005,40 +977,24 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24, v0.t
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; CHECK-NEXT:    vmv4r.v v12, v4
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
@@ -1077,16 +1033,17 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    addi a3, a3, -1
 ; CHECK-NEXT:    and a3, a3, a4
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 4
+; CHECK-NEXT:    slli a4, a4, 3
+; CHECK-NEXT:    mv a5, a4
+; CHECK-NEXT:    slli a4, a4, 1
+; CHECK-NEXT:    add a4, a4, a5
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 3
+; CHECK-NEXT:    slli a4, a4, 4
 ; CHECK-NEXT:    add a4, sp, a4
 ; CHECK-NEXT:    addi a4, a4, 16
 ; CHECK-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -1095,24 +1052,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    vmv.v.x v8, a2
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a2, a2, a4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -1122,7 +1066,10 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB34_2:
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -1131,16 +1078,13 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat>
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -1174,24 +1118,24 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
 ; CHECK-NEXT:    slli a1, a1, 5
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vmset.m v24
+; CHECK-NEXT:    vmv.v.x v0, a1
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:    slli a1, a1, 1
-; CHECK-NEXT:    add a1, a1, a2
+; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmset.m v8
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vs8r.v v0, (a1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v8, a2
+; CHECK-NEXT:    vslidedown.vx v0, v24, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
@@ -1201,26 +1145,22 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
 ; CHECK-NEXT:    addi a3, a3, 16
 ; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20, v0.t
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    mv a3, a2
-; CHECK-NEXT:    slli a2, a2, 1
-; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28, v0.t
-; CHECK-NEXT:    addi a2, sp, 16
-; CHECK-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -1240,15 +1180,15 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
 ; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -1288,11 +1228,11 @@ define <vscale x 1 x half> @vfma_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -1310,11 +1250,11 @@ define <vscale x 1 x half> @vfma_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %c, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -1334,11 +1274,11 @@ define <vscale x 1 x half> @vfma_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <v
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1386,11 +1326,11 @@ define <vscale x 1 x half> @vfma_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1412,11 +1352,11 @@ define <vscale x 1 x half> @vfma_vf_nxv1f16_unmasked_commute(<vscale x 1 x half>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1439,11 +1379,11 @@ define <vscale x 2 x half> @vfma_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x half> %c, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -1461,11 +1401,11 @@ define <vscale x 2 x half> @vfma_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x half> %c, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -1485,11 +1425,11 @@ define <vscale x 2 x half> @vfma_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <v
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -1537,11 +1477,11 @@ define <vscale x 2 x half> @vfma_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -1563,11 +1503,11 @@ define <vscale x 2 x half> @vfma_vf_nxv2f16_unmasked_commute(<vscale x 2 x half>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -1590,11 +1530,11 @@ define <vscale x 4 x half> @vfma_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -1612,11 +1552,11 @@ define <vscale x 4 x half> @vfma_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x half> %c, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -1636,11 +1576,11 @@ define <vscale x 4 x half> @vfma_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <v
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -1660,9 +1600,9 @@ define <vscale x 4 x half> @vfma_vf_nxv4f16_commute(<vscale x 4 x half> %va, hal
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -1688,11 +1628,11 @@ define <vscale x 4 x half> @vfma_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -1714,11 +1654,11 @@ define <vscale x 4 x half> @vfma_vf_nxv4f16_unmasked_commute(<vscale x 4 x half>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -1741,11 +1681,11 @@ define <vscale x 8 x half> @vfma_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fma.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -1763,11 +1703,11 @@ define <vscale x 8 x half> @vfma_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <v
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fma.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -1787,11 +1727,11 @@ define <vscale x 8 x half> @vfma_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <v
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1811,9 +1751,9 @@ define <vscale x 8 x half> @vfma_vf_nxv8f16_commute(<vscale x 8 x half> %va, hal
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -1839,11 +1779,11 @@ define <vscale x 8 x half> @vfma_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1865,11 +1805,11 @@ define <vscale x 8 x half> @vfma_vf_nxv8f16_unmasked_commute(<vscale x 8 x half>
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1890,15 +1830,13 @@ define <vscale x 16 x half> @vfma_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-LABEL: vfma_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
-; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x half> %c, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1916,11 +1854,11 @@ define <vscale x 16 x half> @vfma_vv_nxv16f16_unmasked(<vscale x 16 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x half> %c, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1935,18 +1873,16 @@ define <vscale x 16 x half> @vfma_vf_nxv16f16(<vscale x 16 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: vfma_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v16, v12
-; ZVFHMIN-NEXT:    vmv4r.v v20, v8
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1966,9 +1902,9 @@ define <vscale x 16 x half> @vfma_vf_nxv16f16_commute(<vscale x 16 x half> %va,
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -1994,11 +1930,11 @@ define <vscale x 16 x half> @vfma_vf_nxv16f16_unmasked(<vscale x 16 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2020,11 +1956,11 @@ define <vscale x 16 x half> @vfma_vf_nxv16f16_unmasked_commute(<vscale x 16 x ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -2077,8 +2013,6 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -2092,10 +2026,8 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2182,15 +2114,15 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v24
+; ZVFHMIN-NEXT:    vmset.m v16
 ; ZVFHMIN-NEXT:    slli a0, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a1, a0
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a1, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -2198,11 +2130,9 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -2216,10 +2146,8 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2297,16 +2225,17 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -2315,24 +2244,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2343,7 +2259,7 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
 ; ZVFHMIN-NEXT:  .LBB68_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -2352,21 +2268,21 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -2374,7 +2290,7 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -2414,17 +2330,17 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
+; ZVFHMIN-NEXT:    mv a3, a1
 ; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
+; ZVFHMIN-NEXT:    add a1, a1, a3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -2434,20 +2350,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    slli a3, a3, 4
+; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -2455,10 +2363,14 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2469,7 +2381,7 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:  .LBB69_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -2478,40 +2390,24 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 1
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
@@ -2556,16 +2452,17 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -2574,24 +2471,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2601,7 +2485,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB70_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    mv a2, a1
+; ZVFHMIN-NEXT:    slli a1, a1, 1
+; ZVFHMIN-NEXT:    add a1, a1, a2
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -2610,16 +2497,13 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -2659,24 +2543,24 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
 ; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vmset.m v24
+; ZVFHMIN-NEXT:    vmv.v.x v0, a1
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
+; ZVFHMIN-NEXT:    mv a3, a1
 ; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
+; ZVFHMIN-NEXT:    add a1, a1, a3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v8
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vs8r.v v0, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
@@ -2686,26 +2570,22 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    mv a3, a2
+; ZVFHMIN-NEXT:    slli a2, a2, 1
+; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -2725,15 +2605,15 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -3660,11 +3540,11 @@ define <vscale x 1 x half> @vfmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 %evl)
   %v = call <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %negc, <vscale x 1 x i1> %m, i32 %evl)
@@ -3685,11 +3565,11 @@ define <vscale x 1 x half> @vfmsub_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   %v = call <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %negc, <vscale x 1 x i1> splat (i1 true), i32 %evl)
@@ -3712,11 +3592,11 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -3770,11 +3650,11 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -3799,11 +3679,11 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16_unmasked_commute(<vscale x 1 x hal
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -3938,11 +3818,11 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -4000,11 +3880,11 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -4031,11 +3911,11 @@ define <vscale x 1 x half> @vfnmadd_vf_nxv1f16_unmasked_commute(<vscale x 1 x ha
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -4352,11 +4232,11 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -4381,11 +4261,11 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16_unmasked_commute(<vscale x 1 x ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -4527,11 +4407,11 @@ define <vscale x 2 x half> @vfmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x i1> %m, i32 %evl)
   %v = call <vscale x 2 x half> @llvm.vp.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x half> %negc, <vscale x 2 x i1> %m, i32 %evl)
@@ -4552,11 +4432,11 @@ define <vscale x 2 x half> @vfmsub_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   %v = call <vscale x 2 x half> @llvm.vp.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x half> %negc, <vscale x 2 x i1> splat (i1 true), i32 %evl)
@@ -4579,11 +4459,11 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -4637,11 +4517,11 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -4666,11 +4546,11 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16_unmasked_commute(<vscale x 2 x hal
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -4805,11 +4685,11 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -4867,11 +4747,11 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -4898,11 +4778,11 @@ define <vscale x 2 x half> @vfnmadd_vf_nxv2f16_unmasked_commute(<vscale x 2 x ha
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -5219,11 +5099,11 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -5248,11 +5128,11 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16_unmasked_commute(<vscale x 2 x ha
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -5391,14 +5271,14 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> %m, i32 %evl)
   %v = call <vscale x 4 x half> @llvm.vp.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x half> %negc, <vscale x 4 x i1> %m, i32 %evl)
@@ -5419,11 +5299,11 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   %v = call <vscale x 4 x half> @llvm.vp.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x half> %negc, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -5441,16 +5321,16 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v14, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -5499,12 +5379,12 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vxor.vx v12, v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5528,12 +5408,12 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16_unmasked_commute(<vscale x 4 x hal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vxor.vx v12, v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5559,8 +5439,8 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5586,8 +5466,8 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_commuted(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5613,8 +5493,8 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5640,8 +5520,8 @@ define <vscale x 4 x half> @vfnmadd_vv_nxv4f16_unmasked_commuted(<vscale x 4 x h
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5666,13 +5546,13 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v10, v9, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5697,13 +5577,13 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_commute(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v10, v9, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5728,13 +5608,13 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v10, v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5759,13 +5639,13 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_unmasked_commute(<vscale x 4 x ha
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v10, v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v11, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -5792,10 +5672,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v13, v10, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v14, v10, v0.t
@@ -5823,10 +5703,10 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v13, v10, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
@@ -5855,9 +5735,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked(<vscale x 4 x
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v13, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
@@ -5886,9 +5766,9 @@ define <vscale x 4 x half> @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v13, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
@@ -5916,8 +5796,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5943,8 +5823,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_commuted(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5970,8 +5850,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -5997,8 +5877,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16_unmasked_commuted(<vscale x 4 x h
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -6023,16 +5903,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -6052,14 +5932,14 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_commute(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v10, v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v10, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -6081,12 +5961,12 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v9, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -6110,12 +5990,12 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_unmasked_commute(<vscale x 4 x ha
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v13, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v9, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -6141,8 +6021,8 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat(<vscale x 4 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -6170,8 +6050,8 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16_neg_splat_commute(<vscale x 4 x h
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -6258,14 +6138,14 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> %m, i32 %evl)
   %v = call <vscale x 8 x half> @llvm.vp.fma.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x half> %negc, <vscale x 8 x i1> %m, i32 %evl)
@@ -6286,11 +6166,11 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   %v = call <vscale x 8 x half> @llvm.vp.fma.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x half> %negc, <vscale x 8 x i1> splat (i1 true), i32 %evl)
@@ -6308,16 +6188,16 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v20, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6366,12 +6246,12 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vxor.vx v16, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6395,12 +6275,12 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16_unmasked_commute(<vscale x 8 x hal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vxor.vx v16, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6426,8 +6306,8 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6453,8 +6333,8 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_commuted(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6480,8 +6360,8 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6507,8 +6387,8 @@ define <vscale x 8 x half> @vfnmadd_vv_nxv8f16_unmasked_commuted(<vscale x 8 x h
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6533,13 +6413,13 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6564,13 +6444,13 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_commute(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6595,13 +6475,13 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v14, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6626,13 +6506,13 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_unmasked_commute(<vscale x 8 x ha
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v14, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6659,10 +6539,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v18, v12, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v20, v12, v0.t
@@ -6690,10 +6570,10 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v18, v12, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
@@ -6722,9 +6602,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked(<vscale x 8 x
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT:    vxor.vx v18, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
@@ -6753,9 +6633,9 @@ define <vscale x 8 x half> @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute(<vscal
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT:    vxor.vx v18, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
@@ -6783,8 +6663,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6810,8 +6690,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_commuted(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6837,8 +6717,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6864,8 +6744,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16_unmasked_commuted(<vscale x 8 x h
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a1
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -6890,16 +6770,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -6919,14 +6799,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_commute(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v12, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -6948,12 +6828,12 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v10, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -6977,12 +6857,12 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_unmasked_commute(<vscale x 8 x ha
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v18, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v10, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -7008,8 +6888,8 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat(<vscale x 8 x half> %va
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -7037,8 +6917,8 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16_neg_splat_commute(<vscale x 8 x h
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v16, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v14, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -7123,18 +7003,16 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
-; ZVFHMIN-NEXT:    vmv4r.v v20, v8
-; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v20, v16, a1, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %c, <vscale x 16 x i1> %m, i32 %evl)
   %v = call <vscale x 16 x half> @llvm.vp.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x half> %negc, <vscale x 16 x i1> %m, i32 %evl)
@@ -7155,11 +7033,11 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16_unmasked(<vscale x 16 x half> %v
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v16, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %negc = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %c, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   %v = call <vscale x 16 x half> @llvm.vp.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x half> %negc, <vscale x 16 x i1> splat (i1 true), i32 %evl)
@@ -7175,19 +7053,18 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, half %
 ;
 ; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v16, v8
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
+; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7236,12 +7113,12 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vxor.vx v24, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7265,12 +7142,12 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16_unmasked_commute(<vscale x 16 x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vxor.vx v24, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7294,15 +7171,15 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; ZVFHMIN-LABEL: vfnmadd_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v28, v12, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -7322,13 +7199,13 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_commuted(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmadd_vv_nxv16f16_commuted:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v28, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7352,8 +7229,8 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT:    vxor.vx v24, v16, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vxor.vx v20, v16, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -7379,8 +7256,8 @@ define <vscale x 16 x half> @vfnmadd_vv_nxv16f16_unmasked_commuted(<vscale x 16
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT:    vxor.vx v24, v16, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vxor.vx v20, v16, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -7405,13 +7282,13 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16(<vscale x 16 x half> %va, half
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7436,13 +7313,13 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_commute(<vscale x 16 x half> %v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7467,13 +7344,13 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7498,13 +7375,13 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_unmasked_commute(<vscale x 16 x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v16, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7528,17 +7405,17 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -7560,15 +7437,15 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_neg_splat_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7596,9 +7473,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked(<vscale x 16
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT:    vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v28, v16, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
@@ -7627,9 +7504,9 @@ define <vscale x 16 x half> @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT:    vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v28, v16, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
@@ -7655,15 +7532,15 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v28, v12, a0, v0.t
 ; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -7683,13 +7560,13 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_commuted(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16_commuted:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0, v0.t
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v28, v12, a0, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v16, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7713,8 +7590,8 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT:    vxor.vx v24, v16, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vxor.vx v20, v16, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -7740,8 +7617,8 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16_unmasked_commuted(<vscale x 16
 ; ZVFHMIN-NEXT:    lui a1, 8
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a1
-; ZVFHMIN-NEXT:    vxor.vx v24, v16, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vxor.vx v20, v16, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
@@ -7764,19 +7641,18 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v16, v12
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -7796,14 +7672,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_commute(<vscale x 16 x half> %v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -7825,12 +7701,12 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7854,12 +7730,12 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_unmasked_commute(<vscale x 16 x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v28, a1
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vxor.vx v24, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -7882,16 +7758,16 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v28, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -7913,14 +7789,14 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16_neg_splat_commute(<vscale x 16
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv4r.v v16, v12
-; ZVFHMIN-NEXT:    vmv4r.v v4, v8
+; ZVFHMIN-NEXT:    vmv4r.v v20, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
-; ZVFHMIN-NEXT:    vxor.vx v20, v8, a0, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v12, v8, a0, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8020,7 +7896,6 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
@@ -8030,7 +7905,7 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vslidedown.vx v6, v0, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v16, v24, a2, v0.t
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
@@ -8038,17 +7913,15 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -8057,13 +7930,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v8, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB280_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a1, a0
@@ -8139,46 +8010,36 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    slli a2, a2, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a2
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv8r.v v24, v8
-; ZVFHMIN-NEXT:    vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
-; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
+; ZVFHMIN-NEXT:    vmset.m v7
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
-; ZVFHMIN-NEXT:    vmv4r.v v8, v16
 ; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -8186,17 +8047,17 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB281_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a1, a0
@@ -8214,8 +8075,8 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    mv a1, a0
@@ -8223,15 +8084,15 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -8276,16 +8137,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -8294,24 +8156,11 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8322,7 +8171,7 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; ZVFHMIN-NEXT:  .LBB282_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -8331,21 +8180,21 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -8353,7 +8202,7 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -8394,18 +8243,18 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    lui a2, 8
+; ZVFHMIN-NEXT:    csrr a3, vlenb
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
+; ZVFHMIN-NEXT:    mv a4, a1
 ; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
+; ZVFHMIN-NEXT:    add a1, a1, a4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    lui a2, 8
-; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2, v0.t
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
@@ -8423,14 +8272,6 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -8438,10 +8279,14 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8461,20 +8306,11 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    mv a1, a0
@@ -8482,19 +8318,12 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
@@ -8543,16 +8372,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -8561,24 +8391,11 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8588,7 +8405,7 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB284_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -8597,16 +8414,16 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -8647,20 +8464,20 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v24
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-NEXT:    vmv.v.x v0, a1
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    mv a4, a1
+; ZVFHMIN-NEXT:    slli a1, a1, 1
+; ZVFHMIN-NEXT:    add a1, a1, a4
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v0, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
@@ -8678,14 +8495,6 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -8693,10 +8502,14 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8715,16 +8528,16 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
@@ -8796,23 +8609,21 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    slli a5, a5, 4
 ; ZVFHMIN-NEXT:    add a5, sp, a5
 ; ZVFHMIN-NEXT:    addi a5, a5, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v16, a2, v0.t
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
@@ -8824,10 +8635,8 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -8839,7 +8648,7 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:  .LBB286_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -8848,13 +8657,13 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -8868,7 +8677,7 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -8910,18 +8719,12 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v24, v16
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
 ; ZVFHMIN-NEXT:    slli a0, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
@@ -8939,15 +8742,13 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -8955,10 +8756,8 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -9039,18 +8838,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    slli a2, a2, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a2
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    vmset.m v7
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
@@ -9058,25 +8849,25 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
@@ -9086,31 +8877,34 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB288_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:  .LBB288_2:
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -9122,7 +8916,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v0
@@ -9157,46 +8954,56 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    slli a2, a2, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a2
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a3, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vxor.vx v8, v0, a2
 ; ZVFHMIN-NEXT:    slli a0, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
+; ZVFHMIN-NEXT:    csrr a5, vlenb
+; ZVFHMIN-NEXT:    slli a5, a5, 3
+; ZVFHMIN-NEXT:    mv a6, a5
+; ZVFHMIN-NEXT:    slli a5, a5, 1
+; ZVFHMIN-NEXT:    add a5, a5, a6
+; ZVFHMIN-NEXT:    add a5, sp, a5
+; ZVFHMIN-NEXT:    addi a5, a5, 16
+; ZVFHMIN-NEXT:    vl1r.v v7, (a5) # vscale x 8-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -9204,31 +9011,34 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB289_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:  .LBB289_2:
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -9240,7 +9050,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
@@ -9306,14 +9119,12 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -9321,12 +9132,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB290_2
@@ -9438,8 +9247,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -9453,15 +9260,12 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv.v.v v4, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB291_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
@@ -9471,11 +9275,11 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -9489,26 +9293,16 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv.v.v v16, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -9564,31 +9358,24 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
 ; ZVFHMIN-NEXT:    slli a4, a4, 3
-; ZVFHMIN-NEXT:    add a4, sp, a4
-; ZVFHMIN-NEXT:    addi a4, a4, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
 ; ZVFHMIN-NEXT:    mv a5, a4
 ; ZVFHMIN-NEXT:    slli a4, a4, 1
 ; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v16, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB292_2
@@ -9602,10 +9389,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
@@ -9616,14 +9400,14 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -9675,16 +9459,17 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
@@ -9693,24 +9478,11 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -9720,7 +9492,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB293_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -9729,16 +9501,16 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -9810,8 +9582,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -9825,15 +9595,12 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmv.v.v v4, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v4, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a0, a1, .LBB294_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a0, a1
@@ -9843,11 +9610,11 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -9861,26 +9628,16 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
-; ZVFHMIN-NEXT:    vmv.v.v v16, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -9942,8 +9699,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -9957,10 +9712,8 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10042,14 +9795,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e8, m4, ta, ma
@@ -10060,7 +9805,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    sub a2, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
@@ -10073,15 +9818,13 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -10089,10 +9832,8 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10161,14 +9902,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e8, m4, ta, ma
@@ -10179,7 +9912,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    sub a2, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
@@ -10192,15 +9925,13 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -10208,10 +9939,8 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10312,23 +10041,21 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    slli a5, a5, 4
 ; ZVFHMIN-NEXT:    add a5, sp, a5
 ; ZVFHMIN-NEXT:    addi a5, a5, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v16, a2, v0.t
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v6
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
@@ -10340,10 +10067,8 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10355,7 +10080,7 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:  .LBB298_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -10364,13 +10089,13 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
@@ -10384,7 +10109,7 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -10426,18 +10151,12 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v24, v16
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
 ; ZVFHMIN-NEXT:    slli a0, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
@@ -10455,15 +10174,13 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -10471,10 +10188,8 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10555,18 +10270,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    slli a2, a2, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a2
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    vmset.m v7
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
@@ -10574,25 +10281,25 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
@@ -10602,31 +10309,34 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB300_2
 ; ZVFHMIN-NEXT:  # %bb.1:
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:  .LBB300_2:
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -10638,7 +10348,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v0
@@ -10673,46 +10386,56 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    slli a2, a2, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a2
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a3, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a3
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a2, 8
-; ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a3, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v16, v16, a2
+; ZVFHMIN-NEXT:    vxor.vx v8, v0, a2
 ; ZVFHMIN-NEXT:    slli a0, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
 ; ZVFHMIN-NEXT:    sub a4, a1, a0
+; ZVFHMIN-NEXT:    csrr a5, vlenb
+; ZVFHMIN-NEXT:    slli a5, a5, 3
+; ZVFHMIN-NEXT:    mv a6, a5
+; ZVFHMIN-NEXT:    slli a5, a5, 1
+; ZVFHMIN-NEXT:    add a5, a5, a6
+; ZVFHMIN-NEXT:    add a5, sp, a5
+; ZVFHMIN-NEXT:    addi a5, a5, 16
+; ZVFHMIN-NEXT:    vl1r.v v7, (a5) # vscale x 8-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a1, a4
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 4
+; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a2, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -10720,31 +10443,34 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    csrr a2, vlenb
+; ZVFHMIN-NEXT:    slli a2, a2, 3
+; ZVFHMIN-NEXT:    add a2, sp, a2
+; ZVFHMIN-NEXT:    addi a2, a2, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
 ; ZVFHMIN-NEXT:    bltu a1, a0, .LBB301_2
-; ZVFHMIN-NEXT:  # %bb.1:
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:  .LBB301_2:
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:  # %bb.1:
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:  .LBB301_2:
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -10756,7 +10482,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
@@ -10805,16 +10534,17 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
@@ -10823,24 +10553,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10851,7 +10568,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:  .LBB302_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
@@ -10860,21 +10577,21 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -10882,7 +10599,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
@@ -10936,42 +10653,30 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -10983,7 +10688,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:  .LBB303_2:
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v3
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
@@ -10992,17 +10697,6 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    mv a1, a0
 ; ZVFHMIN-NEXT:    slli a0, a0, 1
@@ -11015,11 +10709,12 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
@@ -11068,16 +10763,17 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
@@ -11086,24 +10782,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -11113,7 +10796,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB304_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -11122,16 +10805,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -11187,42 +10870,30 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a3, a3, a4
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    slli a4, a4, 4
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    csrr a4, vlenb
-; ZVFHMIN-NEXT:    slli a4, a4, 4
+; ZVFHMIN-NEXT:    slli a4, a4, 3
+; ZVFHMIN-NEXT:    mv a5, a4
+; ZVFHMIN-NEXT:    slli a4, a4, 1
+; ZVFHMIN-NEXT:    add a4, a4, a5
 ; ZVFHMIN-NEXT:    add a4, sp, a4
 ; ZVFHMIN-NEXT:    addi a4, a4, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a4, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a2
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    slli a2, a2, 3
-; ZVFHMIN-NEXT:    mv a4, a2
-; ZVFHMIN-NEXT:    slli a2, a2, 1
-; ZVFHMIN-NEXT:    add a2, a2, a4
-; ZVFHMIN-NEXT:    add a2, sp, a2
-; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -11232,7 +10903,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    mv a0, a1
 ; ZVFHMIN-NEXT:  .LBB305_2:
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
@@ -11241,16 +10912,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    mv a1, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 1
+; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    mv a1, a0
-; ZVFHMIN-NEXT:    slli a0, a0, 1
-; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
@@ -11292,21 +10963,13 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
 ; ZVFHMIN-NEXT:    sub a2, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a3
@@ -11317,18 +10980,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT:    vmv8r.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -11336,10 +10996,8 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -11354,22 +11012,22 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    mv a1, a0
@@ -11377,20 +11035,20 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
 ; ZVFHMIN-NEXT:    add a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vmv4r.v v12, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -11422,22 +11080,13 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv1r.v v3, v0
-; ZVFHMIN-NEXT:    vmv8r.v v24, v16
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
-; ZVFHMIN-NEXT:    vxor.vx v8, v16, a2, v0.t
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2, v0.t
 ; ZVFHMIN-NEXT:    sub a2, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a3
@@ -11448,18 +11097,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vmv4r.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -11467,10 +11113,8 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -11551,43 +11195,29 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    slli a1, a1, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 3
-; ZVFHMIN-NEXT:    mv a2, a1
-; ZVFHMIN-NEXT:    slli a1, a1, 1
-; ZVFHMIN-NEXT:    add a1, a1, a2
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    lui a2, 8
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v8
+; ZVFHMIN-NEXT:    vmset.m v7
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a3, 1
 ; ZVFHMIN-NEXT:    srli a3, a3, 2
-; ZVFHMIN-NEXT:    vxor.vx v16, v24, a2
+; ZVFHMIN-NEXT:    vxor.vx v24, v24, a2
 ; ZVFHMIN-NEXT:    sub a2, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v7, a3
 ; ZVFHMIN-NEXT:    sltu a3, a0, a2
 ; ZVFHMIN-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-NEXT:    and a2, a3, a2
-; ZVFHMIN-NEXT:    vmv4r.v v8, v16
 ; ZVFHMIN-NEXT:    csrr a3, vlenb
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 3
 ; ZVFHMIN-NEXT:    mv a3, a2
@@ -11595,18 +11225,14 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -11706,8 +11332,6 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    slli a2, a2, 4
 ; ZVFHMIN-NEXT:    add a2, sp, a2
@@ -11721,10 +11345,8 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
 ; ZVFHMIN-NEXT:    add a2, a2, a3
 ; ZVFHMIN-NEXT:    add a2, sp, a2
 ; ZVFHMIN-NEXT:    addi a2, a2, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
-; ZVFHMIN-NEXT:    addi a2, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -15329,11 +14951,11 @@ define <vscale x 1 x half> @vfma_vv_nxv1f16_double_neg(<vscale x 1 x half> %a, <
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %nega = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
   %negb = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
index 40d0a2b280cd1..eafed8e89e779 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
@@ -41,11 +41,11 @@ define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vv v8, v11, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -97,11 +97,11 @@ define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v10, v12
+; CHECK-NEXT:    vfmadd.vv v8, v10, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x bfloat> %vd
@@ -115,11 +115,11 @@ define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v12, v10
+; CHECK-NEXT:    vfmadd.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -134,11 +134,11 @@ define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v20, v16
+; CHECK-NEXT:    vfmadd.vv v8, v20, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x bfloat> %vd
@@ -150,13 +150,13 @@ define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8
+; CHECK-NEXT:    vmv.v.x v18, a0
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v18
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v16, v12
+; CHECK-NEXT:    vfmadd.vv v16, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -168,30 +168,14 @@ define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) strictfp {
 ; CHECK-LABEL: vfmadd_vv_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v24, v0, v16
+; CHECK-NEXT:    vfmadd.vv v8, v0, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x bfloat> %vd
@@ -205,11 +189,11 @@ define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v0, v24, v16
+; CHECK-NEXT:    vfmadd.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -267,35 +251,35 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v0, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    li a1, 24
 ; CHECK-NEXT:    mul a0, a0, a1
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v16, v8, v24
+; CHECK-NEXT:    vfmadd.vv v24, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 5
 ; CHECK-NEXT:    add sp, sp, a0
@@ -348,24 +332,21 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v24, v16, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v4
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v4
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v16, v8, v0
+; CHECK-NEXT:    vfmadd.vv v16, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
@@ -421,11 +402,11 @@ define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -497,11 +478,11 @@ define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %vd
@@ -521,11 +502,11 @@ define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -547,11 +528,11 @@ define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %vd
@@ -569,13 +550,13 @@ define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT:    vmv.v.x v18, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -594,30 +575,14 @@ define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 2
-; ZVFHMIN-NEXT:    add sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT:    addi sp, sp, 16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %vd
@@ -637,11 +602,11 @@ define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -707,35 +672,35 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -794,24 +759,21 @@ define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
index ba3ffc502f461..8134f44557a40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
@@ -17,11 +17,11 @@ define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v10, v11
+; CHECK-NEXT:    vfmadd.vv v8, v10, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc)
   ret <vscale x 1 x bfloat> %vd
@@ -69,11 +69,11 @@ define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v9, v11
+; CHECK-NEXT:    vfmadd.vv v8, v9, v11
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vd
@@ -85,11 +85,11 @@ define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vf v11, fa5, v10
+; CHECK-NEXT:    vfmadd.vf v8, fa5, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v11
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -140,12 +140,12 @@ define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v24, v20, v16
+; CHECK-NEXT:    vfmadd.vv v20, v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
 ; CHECK-NEXT:    ret
   %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va)
   ret <vscale x 8 x bfloat> %vd
@@ -157,11 +157,11 @@ define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vf v16, fa5, v12
+; CHECK-NEXT:    vfmadd.vf v8, fa5, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -260,18 +260,18 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFH-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 4
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v28
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    mv a1, a0
@@ -279,18 +279,18 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    add a0, a0, a1
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v28
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 4
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFH-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v24
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 5
 ; ZVFH-NEXT:    add sp, sp, a0
@@ -347,35 +347,35 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v28
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v28
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -393,28 +393,27 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    addi sp, sp, -16
 ; ZVFH-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 2
-; ZVFH-NEXT:    add a0, a0, a1
+; ZVFH-NEXT:    slli a0, a0, 5
 ; ZVFH-NEXT:    sub sp, sp, a0
-; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vmv8r.v v0, v16
 ; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vmv8r.v v16, v8
 ; ZVFH-NEXT:    fmv.x.h a0, fa0
-; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
 ; ZVFH-NEXT:    csrr a1, vlenb
 ; ZVFH-NEXT:    slli a1, a1, 4
 ; ZVFH-NEXT:    add a1, sp, a1
 ; ZVFH-NEXT:    addi a1, a1, 16
 ; ZVFH-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v0
 ; ZVFH-NEXT:    csrr a1, vlenb
 ; ZVFH-NEXT:    slli a1, a1, 3
 ; ZVFH-NEXT:    add a1, sp, a1
 ; ZVFH-NEXT:    addi a1, a1, 16
-; ZVFH-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; ZVFH-NEXT:    vmv.v.x v24, a0
 ; ZVFH-NEXT:    csrr a0, vlenb
@@ -434,12 +433,7 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    addi a0, a0, 16
 ; ZVFH-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v0
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 5
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v0
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 4
 ; ZVFH-NEXT:    add a0, sp, a0
@@ -450,23 +444,13 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
 ; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 5
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vfmadd.vv v16, v24, v0
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 5
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfmadd.vv v8, v24, v0
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; ZVFH-NEXT:    addi a0, sp, 16
 ; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; ZVFH-NEXT:    csrr a0, vlenb
 ; ZVFH-NEXT:    slli a0, a0, 3
 ; ZVFH-NEXT:    mv a1, a0
@@ -475,22 +459,14 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFH-NEXT:    add a0, sp, a0
 ; ZVFH-NEXT:    addi a0, a0, 16
 ; ZVFH-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v4
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v4
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT:    vfmadd.vv v16, v8, v24
-; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 5
-; ZVFH-NEXT:    add a0, sp, a0
-; ZVFH-NEXT:    addi a0, a0, 16
-; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v8
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v0
 ; ZVFH-NEXT:    csrr a0, vlenb
-; ZVFH-NEXT:    slli a0, a0, 3
-; ZVFH-NEXT:    mv a1, a0
-; ZVFH-NEXT:    slli a0, a0, 2
-; ZVFH-NEXT:    add a0, a0, a1
+; ZVFH-NEXT:    slli a0, a0, 5
 ; ZVFH-NEXT:    add sp, sp, a0
 ; ZVFH-NEXT:    .cfi_def_cfa sp, 16
 ; ZVFH-NEXT:    addi sp, sp, 16
@@ -502,26 +478,27 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 40
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -537,12 +514,7 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -553,43 +525,27 @@ define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 40
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
@@ -615,11 +571,11 @@ define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc)
   ret <vscale x 1 x half> %vd
@@ -685,11 +641,11 @@ define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb)
   ret <vscale x 2 x half> %vd
@@ -707,11 +663,11 @@ define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vf v11, fa5, v10
+; ZVFHMIN-NEXT:    vfmadd.vf v8, fa5, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -780,12 +736,12 @@ define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v20, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v20, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
 ; ZVFHMIN-NEXT:    ret
   %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va)
   ret <vscale x 8 x half> %vd
@@ -803,11 +759,11 @@ define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vf v16, fa5, v12
+; ZVFHMIN-NEXT:    vfmadd.vf v8, fa5, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -921,35 +877,35 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
@@ -973,26 +929,27 @@ define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 40
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v16, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
@@ -1008,12 +965,7 @@ define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
@@ -1024,43 +976,27 @@ define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v24, v0
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 40
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 5
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index c06836f129005..9c6cc028eb4a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -53,9 +53,9 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -68,9 +68,9 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
+; CHECK-NEXT:    vfmax.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -87,9 +87,9 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
+; CHECK-NEXT:    vfmax.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -102,9 +102,9 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vf v10, v10, fa5
+; CHECK-NEXT:    vfmax.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -121,9 +121,9 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
+; CHECK-NEXT:    vfmax.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -136,9 +136,9 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
+; CHECK-NEXT:    vfmax.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -155,9 +155,9 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
+; CHECK-NEXT:    vfmax.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -170,9 +170,9 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v16, fa5
+; CHECK-NEXT:    vfmax.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -200,9 +200,9 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v0, v8
+; CHECK-NEXT:    vfmax.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -241,9 +241,9 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v8, v0
+; CHECK-NEXT:    vfmax.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmax.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -276,9 +276,9 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -297,9 +297,9 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -322,9 +322,9 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
@@ -343,9 +343,9 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -368,9 +368,9 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
@@ -389,9 +389,9 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -414,9 +414,9 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
@@ -435,9 +435,9 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -460,9 +460,9 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmax.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
@@ -481,9 +481,9 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfmax.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -517,9 +517,9 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmax.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -564,9 +564,9 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
index 2ed6bf08b5672..c416f641624e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
@@ -21,9 +21,9 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,9 +36,9 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -53,9 +53,9 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -68,9 +68,9 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
+; CHECK-NEXT:    vfmax.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -85,9 +85,9 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -100,9 +100,9 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
+; CHECK-NEXT:    vfmax.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -117,9 +117,9 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -132,9 +132,9 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
+; CHECK-NEXT:    vfmax.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -149,9 +149,9 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16, v0.t
+; CHECK-NEXT:    vfmax.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
+; CHECK-NEXT:    vfmax.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -294,9 +294,9 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.maxnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -315,9 +315,9 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.maxnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -338,9 +338,9 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.maxnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -359,9 +359,9 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.maxnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -382,9 +382,9 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.maxnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -403,9 +403,9 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmax.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.maxnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -426,9 +426,9 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.maxnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -447,9 +447,9 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmax.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.maxnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -470,9 +470,9 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfmax.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maxnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -491,9 +491,9 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmax.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.maxnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index 98ccbf03e1841..596d71b350cf8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
+; CHECK-NEXT:    vfmin.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
+; CHECK-NEXT:    vfmin.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -53,9 +53,9 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
+; CHECK-NEXT:    vfmin.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -68,9 +68,9 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
+; CHECK-NEXT:    vfmin.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -87,9 +87,9 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v12, v10
+; CHECK-NEXT:    vfmin.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -102,9 +102,9 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vf v10, v10, fa5
+; CHECK-NEXT:    vfmin.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -121,9 +121,9 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v16, v12
+; CHECK-NEXT:    vfmin.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -136,9 +136,9 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vf v12, v12, fa5
+; CHECK-NEXT:    vfmin.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -155,9 +155,9 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
+; CHECK-NEXT:    vfmin.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -170,9 +170,9 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vf v16, v16, fa5
+; CHECK-NEXT:    vfmin.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -200,9 +200,9 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v0, v8
+; CHECK-NEXT:    vfmin.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmin.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -241,9 +241,9 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v8, v0
+; CHECK-NEXT:    vfmin.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmin.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -276,9 +276,9 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -297,9 +297,9 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmin.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -322,9 +322,9 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
@@ -343,9 +343,9 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmin.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -368,9 +368,9 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
@@ -389,9 +389,9 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfmin.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -414,9 +414,9 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
@@ -435,9 +435,9 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmin.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -460,9 +460,9 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmin.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
@@ -481,9 +481,9 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfmin.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -517,9 +517,9 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmin.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -564,9 +564,9 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
index 42e8de1b56c55..559860b922d85 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
@@ -21,9 +21,9 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,9 +36,9 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
+; CHECK-NEXT:    vfmin.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -53,9 +53,9 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -68,9 +68,9 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
+; CHECK-NEXT:    vfmin.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -85,9 +85,9 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v12, v10, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -100,9 +100,9 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v12, v10
+; CHECK-NEXT:    vfmin.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -117,9 +117,9 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v16, v12, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -132,9 +132,9 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v16, v12
+; CHECK-NEXT:    vfmin.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -149,9 +149,9 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16, v0.t
+; CHECK-NEXT:    vfmin.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
+; CHECK-NEXT:    vfmin.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -294,9 +294,9 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.minnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -315,9 +315,9 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.minnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -338,9 +338,9 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.minnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -359,9 +359,9 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.minnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -382,9 +382,9 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.minnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -403,9 +403,9 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmin.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.minnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -426,9 +426,9 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.minnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -447,9 +447,9 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmin.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.minnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -470,9 +470,9 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfmin.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -491,9 +491,9 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmin.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.minnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
index 624de307f5d4f..3345382632f67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
@@ -54,11 +54,11 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -110,11 +110,11 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v9
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -139,11 +139,11 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 4 x half> %vc
   %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -161,12 +161,12 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a0
+; ZVFHMIN-NEXT:    vmv.v.x v13, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vxor.vx v12, v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -195,11 +195,11 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 8 x half> %va
   %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -217,12 +217,12 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a0
+; ZVFHMIN-NEXT:    vmv.v.x v18, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vxor.vx v10, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
@@ -249,8 +249,8 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v8
@@ -273,12 +273,12 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
+; ZVFHMIN-NEXT:    vmv.v.x v28, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vxor.vx v24, v12, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -327,42 +327,38 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v0, v8, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
 ; ZVFHMIN-NEXT:    mul a0, a0, a1
@@ -411,47 +407,37 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vxor.vx v0, v8, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    li a1, 24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
index 990d3d4e227df..851cc08a7f320 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
+; CHECK-NEXT:    vfmul.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fmul.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -37,9 +37,9 @@ define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v10, v8
+; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
+; CHECK-NEXT:    vfmul.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fmul.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -72,9 +72,9 @@ define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v10, v8
+; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -89,9 +89,9 @@ define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmul.vv v10, v12, v10
+; CHECK-NEXT:    vfmul.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fmul.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -103,13 +103,13 @@ define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmul.vv v10, v10, v8
+; CHECK-NEXT:    vfmul.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -124,9 +124,9 @@ define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vv v12, v16, v12
+; CHECK-NEXT:    vfmul.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fmul.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -138,13 +138,13 @@ define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vv v12, v12, v8
+; CHECK-NEXT:    vfmul.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -159,9 +159,9 @@ define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v16, v24, v16
+; CHECK-NEXT:    vfmul.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fmul.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -173,13 +173,13 @@ define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v16, v16, v8
+; CHECK-NEXT:    vfmul.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -205,9 +205,9 @@ define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v0, v0, v8
+; CHECK-NEXT:    vfmul.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmul.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -247,9 +247,9 @@ define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v0, v8, v0
+; CHECK-NEXT:    vfmul.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmul.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x half> @llvm.experimental.constrained.fmul.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -305,9 +305,9 @@ define <vscale x 1 x half> @vfmul_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -329,9 +329,9 @@ define <vscale x 2 x half> @vfmul_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x half> @llvm.experimental.constrained.fmul.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -353,9 +353,9 @@ define <vscale x 2 x half> @vfmul_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -377,9 +377,9 @@ define <vscale x 4 x half> @vfmul_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x half> @llvm.experimental.constrained.fmul.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -397,13 +397,13 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -425,9 +425,9 @@ define <vscale x 8 x half> @vfmul_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x half> @llvm.experimental.constrained.fmul.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -445,13 +445,13 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -473,9 +473,9 @@ define <vscale x 16 x half> @vfmul_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmul.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x half> @llvm.experimental.constrained.fmul.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -493,13 +493,13 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -532,9 +532,9 @@ define <vscale x 32 x half> @vfmul_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -580,9 +580,9 @@ define <vscale x 32 x half> @vfmul_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmul.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
index f9373400295df..20c9cc98a858b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
+; CHECK-NEXT:    vfmul.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fmul <vscale x 1 x bfloat> %va, %vb
   ret <vscale x 1 x bfloat> %vc
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmul.vf v9, v9, fa5
+; CHECK-NEXT:    vfmul.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -51,9 +51,9 @@ define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v9, v10
+; CHECK-NEXT:    vfmul.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fmul <vscale x 2 x bfloat> %va, %vb
   ret <vscale x 2 x bfloat> %vc
@@ -66,9 +66,9 @@ define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmul.vf v9, v9, fa5
+; CHECK-NEXT:    vfmul.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -83,9 +83,9 @@ define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmul.vv v10, v12, v10
+; CHECK-NEXT:    vfmul.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fmul <vscale x 4 x bfloat> %va, %vb
   ret <vscale x 4 x bfloat> %vc
@@ -98,9 +98,9 @@ define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmul.vf v10, v10, fa5
+; CHECK-NEXT:    vfmul.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -115,9 +115,9 @@ define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vv v12, v16, v12
+; CHECK-NEXT:    vfmul.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fmul <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x bfloat> %vc
@@ -130,9 +130,9 @@ define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vf v12, v12, fa5
+; CHECK-NEXT:    vfmul.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -147,9 +147,9 @@ define <vscale x 8 x bfloat> @vfmul_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vf v12, v12, fa5
+; CHECK-NEXT:    vfmul.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v16, v24, v16
+; CHECK-NEXT:    vfmul.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fmul <vscale x 16 x bfloat> %va, %vb
   ret <vscale x 16 x bfloat> %vc
@@ -179,9 +179,9 @@ define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vf v16, v16, fa5
+; CHECK-NEXT:    vfmul.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -207,9 +207,9 @@ define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v0, v0, v8
+; CHECK-NEXT:    vfmul.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmul.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -248,9 +248,9 @@ define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v0, v8, v0
+; CHECK-NEXT:    vfmul.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfmul.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fmul <vscale x 1 x half> %va, %vb
   ret <vscale x 1 x half> %vc
@@ -302,9 +302,9 @@ define <vscale x 1 x half> @vfmul_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -325,9 +325,9 @@ define <vscale x 2 x half> @vfmul_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fmul <vscale x 2 x half> %va, %vb
   ret <vscale x 2 x half> %vc
@@ -346,9 +346,9 @@ define <vscale x 2 x half> @vfmul_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -369,9 +369,9 @@ define <vscale x 4 x half> @vfmul_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fmul <vscale x 4 x half> %va, %vb
   ret <vscale x 4 x half> %vc
@@ -390,9 +390,9 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -413,9 +413,9 @@ define <vscale x 8 x half> @vfmul_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fmul <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x half> %vc
@@ -434,9 +434,9 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -457,9 +457,9 @@ define <vscale x 8 x half> @vfmul_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -480,9 +480,9 @@ define <vscale x 16 x half> @vfmul_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmul.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fmul <vscale x 16 x half> %va, %vb
   ret <vscale x 16 x half> %vc
@@ -501,9 +501,9 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfmul.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -535,9 +535,9 @@ define <vscale x 32 x half> @vfmul_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -582,9 +582,9 @@ define <vscale x 32 x half> @vfmul_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfmul.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
index 5c8e499d2f5e1..84f27c3b63674 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
@@ -23,9 +23,9 @@ define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -44,9 +44,9 @@ define <vscale x 1 x half> @vfmul_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -67,9 +67,9 @@ define <vscale x 1 x half> @vfmul_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -92,9 +92,9 @@ define <vscale x 1 x half> @vfmul_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -117,9 +117,9 @@ define <vscale x 2 x half> @vfmul_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -138,9 +138,9 @@ define <vscale x 2 x half> @vfmul_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -161,9 +161,9 @@ define <vscale x 2 x half> @vfmul_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -186,9 +186,9 @@ define <vscale x 2 x half> @vfmul_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -211,9 +211,9 @@ define <vscale x 4 x half> @vfmul_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -232,9 +232,9 @@ define <vscale x 4 x half> @vfmul_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -251,13 +251,13 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -276,13 +276,13 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -305,9 +305,9 @@ define <vscale x 8 x half> @vfmul_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -326,9 +326,9 @@ define <vscale x 8 x half> @vfmul_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -345,13 +345,13 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -370,13 +370,13 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -399,9 +399,9 @@ define <vscale x 16 x half> @vfmul_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -420,9 +420,9 @@ define <vscale x 16 x half> @vfmul_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfmul.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -439,13 +439,13 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -464,13 +464,13 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -617,7 +617,7 @@ define <vscale x 32 x half> @vfmul_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -630,9 +630,9 @@ define <vscale x 32 x half> @vfmul_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v24, v16, v0.t
@@ -690,20 +690,20 @@ define <vscale x 32 x half> @vfmul_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v24
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
index 83250a0f90eaf..8398a17ecaeb3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
@@ -17,9 +17,8 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
@@ -64,9 +63,8 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32(
@@ -111,9 +109,8 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(
@@ -158,9 +155,8 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32(
@@ -205,9 +201,8 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32(
@@ -252,9 +247,8 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64(<vscale x 1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64(
@@ -299,9 +293,8 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64(<vscale x 2
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64(
@@ -346,9 +339,8 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64(
@@ -393,9 +385,8 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
index 81b684978bafc..0b97654ec82b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
@@ -14,9 +14,8 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
@@ -61,9 +60,8 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32(
@@ -108,9 +106,8 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32(
@@ -155,9 +152,8 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32(
@@ -202,9 +198,8 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32(
@@ -249,9 +244,8 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64(<vscale x 1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64(
@@ -296,9 +290,8 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64(<vscale x 2
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64(
@@ -343,9 +336,8 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64(
@@ -390,9 +382,8 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
index 697c062c7a71f..f9c43461b6663 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
@@ -14,9 +14,8 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32(<vscale x 1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
@@ -61,9 +60,8 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32(<vscale x 2
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32(
@@ -108,9 +106,8 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32(
@@ -155,9 +152,8 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32(
@@ -202,9 +198,8 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32(
@@ -249,9 +244,8 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64(<vscale x 1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64(
@@ -296,9 +290,8 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64(<vscale x 2
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64(
@@ -343,9 +336,8 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64(
@@ -390,9 +382,8 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
index c0e5c6991aec2..52356389d384f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
@@ -57,8 +56,7 @@ define <vscale x 2 x half> @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32(
@@ -101,8 +99,7 @@ define <vscale x 4 x half> @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32(
@@ -145,8 +142,7 @@ define <vscale x 8 x half> @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32(
@@ -189,8 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32(
@@ -233,8 +228,7 @@ define <vscale x 1 x float> @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64(
@@ -277,8 +271,7 @@ define <vscale x 2 x float> @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64(
@@ -321,8 +314,7 @@ define <vscale x 4 x float> @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64(
@@ -365,8 +357,7 @@ define <vscale x 8 x float> @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
index 4079e1c055c5e..0dd5dabb9df26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
@@ -57,8 +56,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16(
@@ -101,8 +99,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16(
@@ -189,8 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16(
@@ -233,8 +228,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16(
@@ -277,8 +271,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32(
@@ -321,8 +314,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32(
@@ -365,8 +357,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32(
@@ -409,8 +400,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32(
@@ -453,8 +443,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32(
@@ -497,8 +486,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64(
@@ -541,8 +529,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64(
@@ -585,8 +572,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64(
@@ -629,8 +615,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
index 9ce9fbfa8f19c..863c49093c3c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
@@ -57,8 +56,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16(
@@ -101,8 +99,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16(
@@ -189,8 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16(
@@ -233,8 +228,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16(
@@ -277,8 +271,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32(
@@ -321,8 +314,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32(
@@ -365,8 +357,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32(
@@ -409,8 +400,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32(
@@ -453,8 +443,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32(
@@ -497,8 +486,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64(
@@ -541,8 +529,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64(
@@ -585,8 +572,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64(
@@ -629,8 +615,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
index 5831bb33ff90e..58b5676d85119 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
@@ -14,9 +14,8 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x ha
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
@@ -61,9 +60,8 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x ha
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16(
@@ -108,9 +106,8 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x ha
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16(
@@ -155,9 +152,8 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x ha
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16(
@@ -202,9 +198,8 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16(<vscale x 16
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16(
@@ -249,9 +244,8 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16(<vscale x 32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16(
@@ -296,9 +290,8 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32(
@@ -343,9 +336,8 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32(
@@ -390,9 +382,8 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32(
@@ -437,9 +428,8 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32(
@@ -484,9 +474,8 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32(<vscale x 1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32(
@@ -531,9 +520,8 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64(
@@ -578,9 +566,8 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64(
@@ -625,9 +612,8 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64(
@@ -672,9 +658,8 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.x.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
index 3ef0a95197198..a5f3dc92aaf17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
@@ -14,9 +14,8 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x h
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
@@ -61,9 +60,8 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x h
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16(
@@ -108,9 +106,8 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x h
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16(
@@ -155,9 +152,8 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x h
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16(
@@ -202,9 +198,8 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16(<vscale x 16
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16(
@@ -249,9 +244,8 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16(<vscale x 32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16(
@@ -296,9 +290,8 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32(
@@ -343,9 +336,8 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32(
@@ -390,9 +382,8 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32(
@@ -437,9 +428,8 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32(
@@ -484,9 +474,8 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32(
@@ -531,9 +520,8 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64(
@@ -578,9 +566,8 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64(
@@ -625,9 +612,8 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64(
@@ -672,9 +658,8 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fsrmi a1, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.xu.f.w v8, v8
 ; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
index 3b533f8e7bcb3..bec0e16854be0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvtbf16-f-f.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv1bf16.nxv1f32(
@@ -56,8 +55,7 @@ define <vscale x 2 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv2bf16.nxv2f32(
@@ -99,8 +97,7 @@ define <vscale x 4 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv4bf16.nxv4f32(
@@ -142,8 +139,7 @@ define <vscale x 8 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32(<vscal
 ; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv8bf16.nxv8f32(
@@ -185,8 +181,7 @@ define <vscale x 16 x bfloat> @intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32(<vs
 ; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvtbf16.f.f.w.nxv16bf16.nxv16f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index d774289e3eebb..9cc2bcea9aadf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -28,11 +28,11 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 1 x half> %va
   %neg2 = fneg <vscale x 1 x half> %vc
@@ -57,11 +57,11 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -88,11 +88,11 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 2 x half> %va
   %neg2 = fneg <vscale x 2 x half> %vb
@@ -117,11 +117,11 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -145,8 +145,8 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT:    vxor.vx v12, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v12
+; ZVFHMIN-NEXT:    vxor.vx v11, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
@@ -174,9 +174,9 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0
+; ZVFHMIN-NEXT:    vxor.vx v13, v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
@@ -205,8 +205,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vxor.vx v10, v8, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v14
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
@@ -234,14 +234,14 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v20, v12, a0
+; ZVFHMIN-NEXT:    vxor.vx v18, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -264,10 +264,10 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v28, v16, a0
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
@@ -294,9 +294,9 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vmv.v.x v16, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vxor.vx v12, v12, a0
-; ZVFHMIN-NEXT:    vxor.vx v4, v16, a0
+; ZVFHMIN-NEXT:    vxor.vx v28, v16, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
@@ -326,14 +326,14 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 5
+; ZVFHMIN-NEXT:    li a2, 24
+; ZVFHMIN-NEXT:    mul a1, a1, a2
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -341,22 +341,17 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v0, a0
-; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vxor.vx v0, v0, a0
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v0, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vxor.vx v16, v16, a0
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
@@ -366,41 +361,24 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 5
+; ZVFHMIN-NEXT:    li a1, 24
+; ZVFHMIN-NEXT:    mul a0, a0, a1
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
@@ -429,7 +407,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
@@ -440,47 +418,53 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v16, v8, a0
-; ZVFHMIN-NEXT:    vxor.vx v8, v0, a0
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 4
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vxor.vx v16, v0, a0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv8r.v v8, v16
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v16, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 96c28e4c6e0e2..eeadf8ef0e7a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -27,11 +27,11 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 1 x half> %va
   %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %neg, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -54,11 +54,11 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -83,11 +83,11 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %neg = fneg <vscale x 2 x half> %va
   %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %neg, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -110,11 +110,11 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -193,8 +193,8 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vxor.vx v14, v10, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v20, v8, v16
@@ -222,11 +222,11 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vxor.vx v8, v16, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -306,21 +306,17 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    li a2, 24
-; ZVFHMIN-NEXT:    mul a1, a1, a2
+; ZVFHMIN-NEXT:    slli a1, a1, 4
 ; ZVFHMIN-NEXT:    sub sp, sp, a1
-; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv8r.v v0, v16
 ; ZVFHMIN-NEXT:    csrr a1, vlenb
-; ZVFHMIN-NEXT:    slli a1, a1, 4
-; ZVFHMIN-NEXT:    add a1, sp, a1
-; ZVFHMIN-NEXT:    addi a1, a1, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a1, vlenb
 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 ; ZVFHMIN-NEXT:    add a1, sp, a1
 ; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vl8re16.v v24, (a0)
 ; ZVFHMIN-NEXT:    lui a0, 8
@@ -333,31 +329,23 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v8, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
-; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    li a1, 24
-; ZVFHMIN-NEXT:    mul a0, a0, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
@@ -385,49 +373,59 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
 ; ZVFHMIN-NEXT:    sub sp, sp, a0
 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    slli a0, a0, 4
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
 ; ZVFHMIN-NEXT:    vmv8r.v v0, v8
-; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
-; ZVFHMIN-NEXT:    add a0, sp, a0
-; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vmv.v.x v24, a0
 ; ZVFHMIN-NEXT:    lui a0, 8
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT:    csrr a1, vlenb
+; ZVFHMIN-NEXT:    slli a1, a1, 3
+; ZVFHMIN-NEXT:    add a1, sp, a1
+; ZVFHMIN-NEXT:    addi a1, a1, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v0, v24, a0
+; ZVFHMIN-NEXT:    vxor.vx v8, v24, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
-; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT:    csrr a0, vlenb
 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 ; ZVFHMIN-NEXT:    add a0, sp, a0
 ; ZVFHMIN-NEXT:    addi a0, a0, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
-; ZVFHMIN-NEXT:    addi a0, sp, 16
-; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
+; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
index 9c77a6818bcb2..52ecab13fe807 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll
@@ -46,9 +46,9 @@ define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %va) st
 ; CHECK-LABEL: vfpext_nxv2f16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -59,8 +59,8 @@ define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) str
 ; CHECK-LABEL: vfpext_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -71,9 +71,9 @@ define <vscale x 4 x double> @vfpext_nxv4f16_nxv4f64(<vscale x 4 x half> %va) st
 ; CHECK-LABEL: vfpext_nxv4f16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -84,8 +84,8 @@ define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) str
 ; CHECK-LABEL: vfpext_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -96,9 +96,9 @@ define <vscale x 8 x double> @vfpext_nxv8f16_nxv8f64(<vscale x 8 x half> %va) st
 ; CHECK-LABEL: vfpext_nxv8f16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -121,8 +121,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) s
 ; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -133,8 +133,8 @@ define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) s
 ; CHECK-LABEL: vfpext_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -145,8 +145,8 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) s
 ; CHECK-LABEL: vfpext_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -194,9 +194,9 @@ define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -207,8 +207,8 @@ define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.fpext.nxv4f32.nxv4bf16(<vscale x 4 x bfloat> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -219,9 +219,9 @@ define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.fpext.nxv4f64.nxv4bf16(<vscale x 4 x bfloat> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -232,8 +232,8 @@ define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.fpext.nxv8f32.nxv8bf16(<vscale x 8 x bfloat> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -244,9 +244,9 @@ define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.fpext.nxv8f64.nxv8bf16(<vscale x 8 x bfloat> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
index 557cc3fea1592..f13f242223c81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -50,9 +50,9 @@ define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %va) {
 ; CHECK-LABEL: vfpext_nxv2f16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 2 x half> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -63,8 +63,8 @@ define <vscale x 4 x float> @vfpext_nxv4f16_nxv4f32(<vscale x 4 x half> %va) {
 ; CHECK-LABEL: vfpext_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 4 x half> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -75,9 +75,9 @@ define <vscale x 4 x double> @vfpext_nxv4f16_nxv4f64(<vscale x 4 x half> %va) {
 ; CHECK-LABEL: vfpext_nxv4f16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 4 x half> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -88,8 +88,8 @@ define <vscale x 8 x float> @vfpext_nxv8f16_nxv8f32(<vscale x 8 x half> %va) {
 ; CHECK-LABEL: vfpext_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 8 x half> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -100,9 +100,9 @@ define <vscale x 8 x double> @vfpext_nxv8f16_nxv8f64(<vscale x 8 x half> %va) {
 ; CHECK-LABEL: vfpext_nxv8f16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 8 x half> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -113,8 +113,8 @@ define <vscale x 16 x float> @vfpext_nxv16f16_nxv16f32(<vscale x 16 x half> %va)
 ; CHECK-LABEL: vfpext_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 16 x half> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -137,8 +137,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -149,8 +149,8 @@ define <vscale x 4 x double> @vfpext_nxv4f32_nxv4f64(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfpext_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -161,8 +161,8 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfpext_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -210,9 +210,9 @@ define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -223,8 +223,8 @@ define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -235,9 +235,9 @@ define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -248,8 +248,8 @@ define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -260,9 +260,9 @@ define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va)
 ; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -273,8 +273,8 @@ define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %
 ; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
index 137b616d86fcc..2ebdcb4a4f917 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
@@ -34,9 +34,9 @@ define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64(<vscale x 2 x half> %a, <vs
 ; CHECK-LABEL: vfpext_nxv2f16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x double> %v
@@ -46,9 +46,9 @@ define <vscale x 2 x double> @vfpext_nxv2f16_nxv2f64_unmasked(<vscale x 2 x half
 ; CHECK-LABEL: vfpext_nxv2f16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x double> %v
@@ -60,8 +60,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64(<vscale x 2 x float> %a, <v
 ; CHECK-LABEL: vfpext_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x double> %v
@@ -71,8 +71,8 @@ define <vscale x 2 x double> @vfpext_nxv2f32_nxv2f64_unmasked(<vscale x 2 x floa
 ; CHECK-LABEL: vfpext_nxv2f32_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x double> %v
@@ -84,8 +84,8 @@ define <vscale x 7 x double> @vfpext_nxv7f32_nxv7f64(<vscale x 7 x float> %a, <v
 ; CHECK-LABEL: vfpext_nxv7f32_nxv7f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x double> @llvm.vp.fpext.nxv7f64.nxv7f32(<vscale x 7 x float> %a, <vscale x 7 x i1> %m, i32 %vl)
   ret <vscale x 7 x double> %v
@@ -151,9 +151,9 @@ define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %a,
 ; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x double> %v
@@ -163,9 +163,9 @@ define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x bfl
 ; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
index 9e7f4ede29f54..652421c0aeb43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-constrained-sdnode.ll
@@ -9,8 +9,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f16_nxv1i1(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv1f16_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptosi.nxv1i1.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
@@ -22,8 +22,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f16_nxv1i1(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv1f16_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptoui.nxv1i1.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
@@ -35,8 +35,7 @@ define <vscale x 1 x i7> @vfptosi_nxv1f16_nxv1i7(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv1f16_nxv1i7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i7> @llvm.experimental.constrained.fptosi.nxv1i7.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i7> %evec
@@ -47,8 +46,7 @@ define <vscale x 1 x i7> @vfptoui_nxv1f16_nxv1i7(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv1f16_nxv1i7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i7> @llvm.experimental.constrained.fptoui.nxv1i7.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i7> %evec
@@ -59,8 +57,7 @@ define <vscale x 1 x i8> @vfptosi_nxv1f16_nxv1i8(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i8> @llvm.experimental.constrained.fptosi.nxv1i8.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i8> %evec
@@ -71,8 +68,7 @@ define <vscale x 1 x i8> @vfptoui_nxv1f16_nxv1i8(<vscale x 1 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i8> @llvm.experimental.constrained.fptoui.nxv1i8.nxv1f16(<vscale x 1 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i8> %evec
@@ -155,8 +151,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f16_nxv2i1(<vscale x 2 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv2f16_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptosi.nxv2i1.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
@@ -168,8 +164,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f16_nxv2i1(<vscale x 2 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv2f16_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptoui.nxv2i1.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
@@ -181,8 +177,7 @@ define <vscale x 2 x i8> @vfptosi_nxv2f16_nxv2i8(<vscale x 2 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i8> @llvm.experimental.constrained.fptosi.nxv2i8.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i8> %evec
@@ -193,8 +188,7 @@ define <vscale x 2 x i8> @vfptoui_nxv2f16_nxv2i8(<vscale x 2 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i8> @llvm.experimental.constrained.fptoui.nxv2i8.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i8> %evec
@@ -251,9 +245,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2f16_nxv2i64(<vscale x 2 x half> %va) stri
 ; CHECK-LABEL: vfptosi_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptosi.nxv2i64.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i64> %evec
@@ -264,9 +258,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2f16_nxv2i64(<vscale x 2 x half> %va) stri
 ; CHECK-LABEL: vfptoui_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptoui.nxv2i64.nxv2f16(<vscale x 2 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i64> %evec
@@ -277,8 +271,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f16_nxv4i1(<vscale x 4 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv4f16_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptosi.nxv4i1.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
@@ -290,8 +284,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f16_nxv4i1(<vscale x 4 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv4f16_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptoui.nxv4i1.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
@@ -303,8 +297,7 @@ define <vscale x 4 x i8> @vfptosi_nxv4f16_nxv4i8(<vscale x 4 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i8> @llvm.experimental.constrained.fptosi.nxv4i8.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i8> %evec
@@ -315,8 +308,7 @@ define <vscale x 4 x i8> @vfptoui_nxv4f16_nxv4i8(<vscale x 4 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i8> @llvm.experimental.constrained.fptoui.nxv4i8.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i8> %evec
@@ -349,8 +341,8 @@ define <vscale x 4 x i32> @vfptosi_nxv4f16_nxv4i32(<vscale x 4 x half> %va) stri
 ; CHECK-LABEL: vfptosi_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptosi.nxv4i32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i32> %evec
@@ -361,8 +353,8 @@ define <vscale x 4 x i32> @vfptoui_nxv4f16_nxv4i32(<vscale x 4 x half> %va) stri
 ; CHECK-LABEL: vfptoui_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptoui.nxv4i32.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i32> %evec
@@ -373,9 +365,9 @@ define <vscale x 4 x i64> @vfptosi_nxv4f16_nxv4i64(<vscale x 4 x half> %va) stri
 ; CHECK-LABEL: vfptosi_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptosi.nxv4i64.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i64> %evec
@@ -386,9 +378,9 @@ define <vscale x 4 x i64> @vfptoui_nxv4f16_nxv4i64(<vscale x 4 x half> %va) stri
 ; CHECK-LABEL: vfptoui_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptoui.nxv4i64.nxv4f16(<vscale x 4 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i64> %evec
@@ -399,8 +391,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f16_nxv8i1(<vscale x 8 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv8f16_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptosi.nxv8i1.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
@@ -412,8 +404,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f16_nxv8i1(<vscale x 8 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv8f16_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptoui.nxv8i1.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
@@ -425,8 +417,7 @@ define <vscale x 8 x i8> @vfptosi_nxv8f16_nxv8i8(<vscale x 8 x half> %va) strict
 ; CHECK-LABEL: vfptosi_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptosi.nxv8i8.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -437,8 +428,7 @@ define <vscale x 8 x i8> @vfptoui_nxv8f16_nxv8i8(<vscale x 8 x half> %va) strict
 ; CHECK-LABEL: vfptoui_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptoui.nxv8i8.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -471,8 +461,8 @@ define <vscale x 8 x i32> @vfptosi_nxv8f16_nxv8i32(<vscale x 8 x half> %va) stri
 ; CHECK-LABEL: vfptosi_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptosi.nxv8i32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i32> %evec
@@ -483,8 +473,8 @@ define <vscale x 8 x i32> @vfptoui_nxv8f16_nxv8i32(<vscale x 8 x half> %va) stri
 ; CHECK-LABEL: vfptoui_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptoui.nxv8i32.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i32> %evec
@@ -495,9 +485,9 @@ define <vscale x 8 x i64> @vfptosi_nxv8f16_nxv8i64(<vscale x 8 x half> %va) stri
 ; CHECK-LABEL: vfptosi_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptosi.nxv8i64.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i64> %evec
@@ -508,9 +498,9 @@ define <vscale x 8 x i64> @vfptoui_nxv8f16_nxv8i64(<vscale x 8 x half> %va) stri
 ; CHECK-LABEL: vfptoui_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptoui.nxv8i64.nxv8f16(<vscale x 8 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i64> %evec
@@ -521,8 +511,8 @@ define <vscale x 16 x i1> @vfptosi_nxv16f16_nxv16i1(<vscale x 16 x half> %va) st
 ; CHECK-LABEL: vfptosi_nxv16f16_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i1> @llvm.experimental.constrained.fptosi.nxv16i1.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
@@ -534,8 +524,8 @@ define <vscale x 16 x i1> @vfptoui_nxv16f16_nxv16i1(<vscale x 16 x half> %va) st
 ; CHECK-LABEL: vfptoui_nxv16f16_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i1> @llvm.experimental.constrained.fptoui.nxv16i1.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
@@ -547,8 +537,7 @@ define <vscale x 16 x i8> @vfptosi_nxv16f16_nxv16i8(<vscale x 16 x half> %va) st
 ; CHECK-LABEL: vfptosi_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i8> @llvm.experimental.constrained.fptosi.nxv16i8.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i8> %evec
@@ -559,8 +548,7 @@ define <vscale x 16 x i8> @vfptoui_nxv16f16_nxv16i8(<vscale x 16 x half> %va) st
 ; CHECK-LABEL: vfptoui_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i8> @llvm.experimental.constrained.fptoui.nxv16i8.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i8> %evec
@@ -593,8 +581,8 @@ define <vscale x 16 x i32> @vfptosi_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
 ; CHECK-LABEL: vfptosi_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i32> @llvm.experimental.constrained.fptosi.nxv16i32.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i32> %evec
@@ -605,8 +593,8 @@ define <vscale x 16 x i32> @vfptoui_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
 ; CHECK-LABEL: vfptoui_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i32> @llvm.experimental.constrained.fptoui.nxv16i32.nxv16f16(<vscale x 16 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i32> %evec
@@ -617,8 +605,8 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) st
 ; CHECK-LABEL: vfptosi_nxv32f16_nxv32i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x i1> @llvm.experimental.constrained.fptosi.nxv32i1.nxv32f16(<vscale x 32 x half> %va, metadata !"fpexcept.strict")
@@ -630,8 +618,8 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) st
 ; CHECK-LABEL: vfptoui_nxv32f16_nxv32i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x i1> @llvm.experimental.constrained.fptoui.nxv32i1.nxv32f16(<vscale x 32 x half> %va, metadata !"fpexcept.strict")
@@ -643,8 +631,7 @@ define <vscale x 32 x i8> @vfptosi_nxv32f16_nxv32i8(<vscale x 32 x half> %va) st
 ; CHECK-LABEL: vfptosi_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x i8> @llvm.experimental.constrained.fptosi.nxv32i8.nxv32f16(<vscale x 32 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 32 x i8> %evec
@@ -655,8 +642,7 @@ define <vscale x 32 x i8> @vfptoui_nxv32f16_nxv32i8(<vscale x 32 x half> %va) st
 ; CHECK-LABEL: vfptoui_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x i8> @llvm.experimental.constrained.fptoui.nxv32i8.nxv32f16(<vscale x 32 x half> %va, metadata !"fpexcept.strict")
   ret <vscale x 32 x i8> %evec
@@ -689,8 +675,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f32_nxv1i1(<vscale x 1 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
@@ -702,8 +688,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f32_nxv1i1(<vscale x 1 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
@@ -715,9 +701,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f32_nxv1i8(<vscale x 1 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i8> @llvm.experimental.constrained.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i8> %evec
@@ -728,9 +714,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f32_nxv1i8(<vscale x 1 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i8> @llvm.experimental.constrained.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i8> %evec
@@ -741,8 +727,7 @@ define <vscale x 1 x i16> @vfptosi_nxv1f32_nxv1i16(<vscale x 1 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i16> @llvm.experimental.constrained.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i16> %evec
@@ -753,8 +738,7 @@ define <vscale x 1 x i16> @vfptoui_nxv1f32_nxv1i16(<vscale x 1 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i16> @llvm.experimental.constrained.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i16> %evec
@@ -811,8 +795,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f32_nxv2i1(<vscale x 2 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
@@ -824,8 +808,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f32_nxv2i1(<vscale x 2 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
@@ -837,9 +821,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f32_nxv2i8(<vscale x 2 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i8> @llvm.experimental.constrained.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i8> %evec
@@ -850,9 +834,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f32_nxv2i8(<vscale x 2 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i8> @llvm.experimental.constrained.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i8> %evec
@@ -863,8 +847,7 @@ define <vscale x 2 x i16> @vfptosi_nxv2f32_nxv2i16(<vscale x 2 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i16> @llvm.experimental.constrained.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i16> %evec
@@ -875,8 +858,7 @@ define <vscale x 2 x i16> @vfptoui_nxv2f32_nxv2i16(<vscale x 2 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i16> @llvm.experimental.constrained.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i16> %evec
@@ -909,8 +891,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2f32_nxv2i64(<vscale x 2 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i64> %evec
@@ -921,8 +903,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2f32_nxv2i64(<vscale x 2 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i64> @llvm.experimental.constrained.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i64> %evec
@@ -933,8 +915,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f32_nxv4i1(<vscale x 4 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
@@ -946,8 +928,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f32_nxv4i1(<vscale x 4 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
@@ -959,9 +941,9 @@ define <vscale x 4 x i8> @vfptosi_nxv4f32_nxv4i8(<vscale x 4 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i8> @llvm.experimental.constrained.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i8> %evec
@@ -972,9 +954,9 @@ define <vscale x 4 x i8> @vfptoui_nxv4f32_nxv4i8(<vscale x 4 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i8> @llvm.experimental.constrained.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i8> %evec
@@ -985,8 +967,7 @@ define <vscale x 4 x i16> @vfptosi_nxv4f32_nxv4i16(<vscale x 4 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i16> @llvm.experimental.constrained.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i16> %evec
@@ -997,8 +978,7 @@ define <vscale x 4 x i16> @vfptoui_nxv4f32_nxv4i16(<vscale x 4 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i16> @llvm.experimental.constrained.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i16> %evec
@@ -1031,8 +1011,8 @@ define <vscale x 4 x i64> @vfptosi_nxv4f32_nxv4i64(<vscale x 4 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i64> %evec
@@ -1043,8 +1023,8 @@ define <vscale x 4 x i64> @vfptoui_nxv4f32_nxv4i64(<vscale x 4 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i64> @llvm.experimental.constrained.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i64> %evec
@@ -1055,8 +1035,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f32_nxv8i1(<vscale x 8 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
@@ -1068,8 +1048,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f32_nxv8i1(<vscale x 8 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
@@ -1081,9 +1061,9 @@ define <vscale x 8 x i8> @vfptosi_nxv8f32_nxv8i8(<vscale x 8 x float> %va) stric
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -1094,9 +1074,9 @@ define <vscale x 8 x i8> @vfptoui_nxv8f32_nxv8i8(<vscale x 8 x float> %va) stric
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -1107,8 +1087,7 @@ define <vscale x 8 x i16> @vfptosi_nxv8f32_nxv8i16(<vscale x 8 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i16> @llvm.experimental.constrained.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i16> %evec
@@ -1119,8 +1098,7 @@ define <vscale x 8 x i16> @vfptoui_nxv8f32_nxv8i16(<vscale x 8 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i16> @llvm.experimental.constrained.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i16> %evec
@@ -1153,8 +1131,8 @@ define <vscale x 8 x i64> @vfptosi_nxv8f32_nxv8i64(<vscale x 8 x float> %va) str
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i64> %evec
@@ -1165,8 +1143,8 @@ define <vscale x 8 x i64> @vfptoui_nxv8f32_nxv8i64(<vscale x 8 x float> %va) str
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i64> @llvm.experimental.constrained.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i64> %evec
@@ -1177,8 +1155,8 @@ define <vscale x 16 x i1> @vfptosi_nxv16f32_nxv16i1(<vscale x 16 x float> %va) s
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i1> @llvm.experimental.constrained.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
@@ -1190,8 +1168,8 @@ define <vscale x 16 x i1> @vfptoui_nxv16f32_nxv16i1(<vscale x 16 x float> %va) s
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i1> @llvm.experimental.constrained.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
@@ -1203,9 +1181,9 @@ define <vscale x 16 x i8> @vfptosi_nxv16f32_nxv16i8(<vscale x 16 x float> %va) s
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i8> @llvm.experimental.constrained.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i8> %evec
@@ -1216,9 +1194,9 @@ define <vscale x 16 x i8> @vfptoui_nxv16f32_nxv16i8(<vscale x 16 x float> %va) s
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i8> @llvm.experimental.constrained.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i8> %evec
@@ -1229,8 +1207,7 @@ define <vscale x 16 x i16> @vfptosi_nxv16f32_nxv16i16(<vscale x 16 x float> %va)
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i16> @llvm.experimental.constrained.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i16> %evec
@@ -1241,8 +1218,7 @@ define <vscale x 16 x i16> @vfptoui_nxv16f32_nxv16i16(<vscale x 16 x float> %va)
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x i16> @llvm.experimental.constrained.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> %va, metadata !"fpexcept.strict")
   ret <vscale x 16 x i16> %evec
@@ -1275,8 +1251,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f64_nxv1i1(<vscale x 1 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
@@ -1288,8 +1264,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f64_nxv1i1(<vscale x 1 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i1> @llvm.experimental.constrained.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
@@ -1301,9 +1277,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f64_nxv1i8(<vscale x 1 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1316,9 +1292,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f64_nxv1i8(<vscale x 1 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1331,9 +1307,9 @@ define <vscale x 1 x i16> @vfptosi_nxv1f64_nxv1i16(<vscale x 1 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i16> @llvm.experimental.constrained.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i16> %evec
@@ -1344,9 +1320,9 @@ define <vscale x 1 x i16> @vfptoui_nxv1f64_nxv1i16(<vscale x 1 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i16> @llvm.experimental.constrained.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i16> %evec
@@ -1357,8 +1333,7 @@ define <vscale x 1 x i32> @vfptosi_nxv1f64_nxv1i32(<vscale x 1 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i32> @llvm.experimental.constrained.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i32> %evec
@@ -1369,8 +1344,7 @@ define <vscale x 1 x i32> @vfptoui_nxv1f64_nxv1i32(<vscale x 1 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x i32> @llvm.experimental.constrained.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 1 x i32> %evec
@@ -1403,8 +1377,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f64_nxv2i1(<vscale x 2 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
@@ -1416,8 +1390,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f64_nxv2i1(<vscale x 2 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i1> @llvm.experimental.constrained.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
@@ -1429,9 +1403,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f64_nxv2i8(<vscale x 2 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1444,9 +1418,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f64_nxv2i8(<vscale x 2 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1459,9 +1433,9 @@ define <vscale x 2 x i16> @vfptosi_nxv2f64_nxv2i16(<vscale x 2 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i16> @llvm.experimental.constrained.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i16> %evec
@@ -1472,9 +1446,9 @@ define <vscale x 2 x i16> @vfptoui_nxv2f64_nxv2i16(<vscale x 2 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i16> @llvm.experimental.constrained.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i16> %evec
@@ -1485,8 +1459,7 @@ define <vscale x 2 x i32> @vfptosi_nxv2f64_nxv2i32(<vscale x 2 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i32> @llvm.experimental.constrained.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i32> %evec
@@ -1497,8 +1470,7 @@ define <vscale x 2 x i32> @vfptoui_nxv2f64_nxv2i32(<vscale x 2 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x i32> @llvm.experimental.constrained.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 2 x i32> %evec
@@ -1531,8 +1503,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f64_nxv4i1(<vscale x 4 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
@@ -1544,8 +1516,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f64_nxv4i1(<vscale x 4 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i1> @llvm.experimental.constrained.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
@@ -1557,9 +1529,9 @@ define <vscale x 4 x i8> @vfptosi_nxv4f64_nxv4i8(<vscale x 4 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1572,9 +1544,9 @@ define <vscale x 4 x i8> @vfptoui_nxv4f64_nxv4i8(<vscale x 4 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -1587,9 +1559,9 @@ define <vscale x 4 x i16> @vfptosi_nxv4f64_nxv4i16(<vscale x 4 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i16> @llvm.experimental.constrained.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i16> %evec
@@ -1600,9 +1572,9 @@ define <vscale x 4 x i16> @vfptoui_nxv4f64_nxv4i16(<vscale x 4 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i16> @llvm.experimental.constrained.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i16> %evec
@@ -1613,8 +1585,7 @@ define <vscale x 4 x i32> @vfptosi_nxv4f64_nxv4i32(<vscale x 4 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i32> %evec
@@ -1625,8 +1596,7 @@ define <vscale x 4 x i32> @vfptoui_nxv4f64_nxv4i32(<vscale x 4 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x i32> @llvm.experimental.constrained.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 4 x i32> %evec
@@ -1659,8 +1629,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f64_nxv8i1(<vscale x 8 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
@@ -1672,8 +1642,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f64_nxv8i1(<vscale x 8 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i1> @llvm.experimental.constrained.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
@@ -1685,11 +1655,11 @@ define <vscale x 8 x i8> @vfptosi_nxv8f64_nxv8i8(<vscale x 8 x double> %va) stri
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -1700,11 +1670,11 @@ define <vscale x 8 x i8> @vfptoui_nxv8f64_nxv8i8(<vscale x 8 x double> %va) stri
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i8> @llvm.experimental.constrained.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i8> %evec
@@ -1715,9 +1685,9 @@ define <vscale x 8 x i16> @vfptosi_nxv8f64_nxv8i16(<vscale x 8 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i16> @llvm.experimental.constrained.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i16> %evec
@@ -1728,9 +1698,9 @@ define <vscale x 8 x i16> @vfptoui_nxv8f64_nxv8i16(<vscale x 8 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i16> @llvm.experimental.constrained.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i16> %evec
@@ -1741,8 +1711,7 @@ define <vscale x 8 x i32> @vfptosi_nxv8f64_nxv8i32(<vscale x 8 x double> %va) st
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i32> %evec
@@ -1753,8 +1722,7 @@ define <vscale x 8 x i32> @vfptoui_nxv8f64_nxv8i32(<vscale x 8 x double> %va) st
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x i32> @llvm.experimental.constrained.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> %va, metadata !"fpexcept.strict")
   ret <vscale x 8 x i32> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index 111fa368ac155..57696a18ccb86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -262,9 +262,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2bf16_nxv2i64(<vscale x 2 x bfloat> %va) {
 ; CHECK-LABEL: vfptosi_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x bfloat> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -274,9 +274,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2bf16_nxv2i64(<vscale x 2 x bfloat> %va) {
 ; CHECK-LABEL: vfptoui_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x bfloat> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -384,9 +384,9 @@ define <vscale x 4 x i64> @vfptosi_nxv4bf16_nxv4i64(<vscale x 4 x bfloat> %va) {
 ; CHECK-LABEL: vfptosi_nxv4bf16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x bfloat> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -396,9 +396,9 @@ define <vscale x 4 x i64> @vfptoui_nxv4bf16_nxv4i64(<vscale x 4 x bfloat> %va) {
 ; CHECK-LABEL: vfptoui_nxv4bf16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x bfloat> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -435,9 +435,9 @@ define <vscale x 8 x i8> @vfptosi_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x bfloat> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -448,9 +448,9 @@ define <vscale x 8 x i8> @vfptoui_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x bfloat> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -506,9 +506,9 @@ define <vscale x 8 x i64> @vfptosi_nxv8bf16_nxv8i64(<vscale x 8 x bfloat> %va) {
 ; CHECK-LABEL: vfptosi_nxv8bf16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x bfloat> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -518,9 +518,9 @@ define <vscale x 8 x i64> @vfptoui_nxv8bf16_nxv8i64(<vscale x 8 x bfloat> %va) {
 ; CHECK-LABEL: vfptoui_nxv8bf16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x bfloat> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -557,9 +557,9 @@ define <vscale x 16 x i8> @vfptosi_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 16 x bfloat> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -570,9 +570,9 @@ define <vscale x 16 x i8> @vfptoui_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 16 x bfloat> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -636,10 +636,10 @@ define <vscale x 32 x i1> @vfptosi_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v24
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vand.vi v12, v12, 1
-; CHECK-NEXT:    vmsne.vi v16, v8, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    vmsne.vi v0, v12, 0
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v16, a0
+; CHECK-NEXT:    vslideup.vx v0, v8, a0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
@@ -657,10 +657,10 @@ define <vscale x 32 x i1> @vfptoui_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v24
 ; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vand.vi v12, v12, 1
-; CHECK-NEXT:    vmsne.vi v16, v8, 0
+; CHECK-NEXT:    vmsne.vi v8, v8, 0
 ; CHECK-NEXT:    vmsne.vi v0, v12, 0
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v16, a0
+; CHECK-NEXT:    vslideup.vx v0, v8, a0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
@@ -672,9 +672,9 @@ define <vscale x 32 x i8> @vfptosi_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %va)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
@@ -690,9 +690,9 @@ define <vscale x 32 x i8> @vfptoui_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %va)
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
@@ -732,8 +732,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f16_nxv1i1(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv1f16_nxv1i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -753,8 +753,7 @@ define <vscale x 1 x i7> @vfptosi_nxv1f16_nxv1i7(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv1f16_nxv1i7:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv1f16_nxv1i7:
@@ -773,8 +772,7 @@ define <vscale x 1 x i7> @vfptoui_nxv1f16_nxv1i7(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv1f16_nxv1i7:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv1f16_nxv1i7:
@@ -793,8 +791,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f16_nxv1i1(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv1f16_nxv1i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -814,8 +812,7 @@ define <vscale x 1 x i8> @vfptosi_nxv1f16_nxv1i8(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv1f16_nxv1i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv1f16_nxv1i8:
@@ -834,8 +831,7 @@ define <vscale x 1 x i8> @vfptoui_nxv1f16_nxv1i8(<vscale x 1 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv1f16_nxv1i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv1f16_nxv1i8:
@@ -950,8 +946,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f16_nxv2i1(<vscale x 2 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv2f16_nxv2i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -971,8 +967,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f16_nxv2i1(<vscale x 2 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv2f16_nxv2i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -992,8 +988,7 @@ define <vscale x 2 x i8> @vfptosi_nxv2f16_nxv2i8(<vscale x 2 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv2f16_nxv2i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv2f16_nxv2i8:
@@ -1012,8 +1007,7 @@ define <vscale x 2 x i8> @vfptoui_nxv2f16_nxv2i8(<vscale x 2 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv2f16_nxv2i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv2f16_nxv2i8:
@@ -1104,9 +1098,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2f16_nxv2i64(<vscale x 2 x half> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x half> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -1116,9 +1110,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2f16_nxv2i64(<vscale x 2 x half> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x half> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -1128,8 +1122,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f16_nxv4i1(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv4f16_nxv4i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1149,8 +1143,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f16_nxv4i1(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv4f16_nxv4i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1170,8 +1164,7 @@ define <vscale x 4 x i8> @vfptosi_nxv4f16_nxv4i8(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv4f16_nxv4i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv4f16_nxv4i8:
@@ -1190,8 +1183,7 @@ define <vscale x 4 x i8> @vfptoui_nxv4f16_nxv4i8(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv4f16_nxv4i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv4f16_nxv4i8:
@@ -1244,8 +1236,8 @@ define <vscale x 4 x i32> @vfptosi_nxv4f16_nxv4i32(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv4f16_nxv4i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v8
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; ZVFH-NEXT:    vmv1r.v v9, v8
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv4f16_nxv4i32:
@@ -1263,8 +1255,8 @@ define <vscale x 4 x i32> @vfptoui_nxv4f16_nxv4i32(<vscale x 4 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv4f16_nxv4i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v8
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; ZVFH-NEXT:    vmv1r.v v9, v8
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv4f16_nxv4i32:
@@ -1282,9 +1274,9 @@ define <vscale x 4 x i64> @vfptosi_nxv4f16_nxv4i64(<vscale x 4 x half> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x half> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -1294,9 +1286,9 @@ define <vscale x 4 x i64> @vfptoui_nxv4f16_nxv4i64(<vscale x 4 x half> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x half> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -1306,8 +1298,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f16_nxv8i1(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv8f16_nxv8i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; ZVFH-NEXT:    vand.vi v8, v10, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1327,8 +1319,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f16_nxv8i1(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv8f16_nxv8i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; ZVFH-NEXT:    vand.vi v8, v10, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1348,17 +1340,16 @@ define <vscale x 8 x i8> @vfptosi_nxv8f16_nxv8i8(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv8f16_nxv8i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; ZVFH-NEXT:    vmv.v.v v8, v10
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv8f16_nxv8i8:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v10, v12
+; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v10, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptosi <vscale x 8 x half> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -1368,17 +1359,16 @@ define <vscale x 8 x i8> @vfptoui_nxv8f16_nxv8i8(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv8f16_nxv8i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; ZVFH-NEXT:    vmv.v.v v8, v10
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv8f16_nxv8i8:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v10, v12
+; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v10, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptoui <vscale x 8 x half> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -1422,8 +1412,8 @@ define <vscale x 8 x i32> @vfptosi_nxv8f16_nxv8i32(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv8f16_nxv8i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv2r.v v12, v8
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; ZVFH-NEXT:    vmv2r.v v10, v8
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv8f16_nxv8i32:
@@ -1441,8 +1431,8 @@ define <vscale x 8 x i32> @vfptoui_nxv8f16_nxv8i32(<vscale x 8 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv8f16_nxv8i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vmv2r.v v12, v8
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; ZVFH-NEXT:    vmv2r.v v10, v8
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv8f16_nxv8i32:
@@ -1460,9 +1450,9 @@ define <vscale x 8 x i64> @vfptosi_nxv8f16_nxv8i64(<vscale x 8 x half> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x half> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -1472,9 +1462,9 @@ define <vscale x 8 x i64> @vfptoui_nxv8f16_nxv8i64(<vscale x 8 x half> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x half> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -1484,8 +1474,8 @@ define <vscale x 16 x i1> @vfptosi_nxv16f16_nxv16i1(<vscale x 16 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv16f16_nxv16i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; ZVFH-NEXT:    vand.vi v8, v12, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1505,8 +1495,8 @@ define <vscale x 16 x i1> @vfptoui_nxv16f16_nxv16i1(<vscale x 16 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv16f16_nxv16i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; ZVFH-NEXT:    vand.vi v8, v12, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1526,17 +1516,16 @@ define <vscale x 16 x i8> @vfptosi_nxv16f16_nxv16i8(<vscale x 16 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv16f16_nxv16i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; ZVFH-NEXT:    vmv.v.v v8, v12
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv16f16_nxv16i8:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v12, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptosi <vscale x 16 x half> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -1546,17 +1535,16 @@ define <vscale x 16 x i8> @vfptoui_nxv16f16_nxv16i8(<vscale x 16 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv16f16_nxv16i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; ZVFH-NEXT:    vmv.v.v v8, v12
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv16f16_nxv16i8:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v12, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptoui <vscale x 16 x half> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -1600,8 +1588,8 @@ define <vscale x 16 x i32> @vfptosi_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
 ; ZVFH-LABEL: vfptosi_nxv16f16_nxv16i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv4r.v v16, v8
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; ZVFH-NEXT:    vmv4r.v v12, v8
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv16f16_nxv16i32:
@@ -1619,8 +1607,8 @@ define <vscale x 16 x i32> @vfptoui_nxv16f16_nxv16i32(<vscale x 16 x half> %va)
 ; ZVFH-LABEL: vfptoui_nxv16f16_nxv16i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vmv4r.v v16, v8
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; ZVFH-NEXT:    vmv4r.v v12, v8
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv16f16_nxv16i32:
@@ -1638,8 +1626,8 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv32f16_nxv32i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; ZVFH-NEXT:    vand.vi v8, v16, 1
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1654,10 +1642,10 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v12, v24
 ; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vand.vi v12, v12, 1
-; ZVFHMIN-NEXT:    vmsne.vi v16, v8, 0
+; ZVFHMIN-NEXT:    vmsne.vi v8, v8, 0
 ; ZVFHMIN-NEXT:    vmsne.vi v0, v12, 0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v0, v16, a0
+; ZVFHMIN-NEXT:    vslideup.vx v0, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
@@ -1667,8 +1655,8 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv32f16_nxv32i1:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; ZVFH-NEXT:    vand.vi v8, v16, 1
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; ZVFH-NEXT:    vand.vi v8, v8, 1
 ; ZVFH-NEXT:    vmsne.vi v0, v8, 0
 ; ZVFH-NEXT:    ret
 ;
@@ -1683,10 +1671,10 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v12, v24
 ; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vand.vi v12, v12, 1
-; ZVFHMIN-NEXT:    vmsne.vi v16, v8, 0
+; ZVFHMIN-NEXT:    vmsne.vi v8, v8, 0
 ; ZVFHMIN-NEXT:    vmsne.vi v0, v12, 0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslideup.vx v0, v16, a0
+; ZVFHMIN-NEXT:    vslideup.vx v0, v8, a0
 ; ZVFHMIN-NEXT:    ret
   %evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
   ret <vscale x 32 x i1> %evec
@@ -1696,8 +1684,7 @@ define <vscale x 32 x i8> @vfptosi_nxv32f16_nxv32i8(<vscale x 32 x half> %va) {
 ; ZVFH-LABEL: vfptosi_nxv32f16_nxv32i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; ZVFH-NEXT:    vmv.v.v v8, v16
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv32f16_nxv32i8:
@@ -1705,9 +1692,9 @@ define <vscale x 32 x i8> @vfptosi_nxv32f16_nxv32i8(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v12, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v12, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
@@ -1721,8 +1708,7 @@ define <vscale x 32 x i8> @vfptoui_nxv32f16_nxv32i8(<vscale x 32 x half> %va) {
 ; ZVFH-LABEL: vfptoui_nxv32f16_nxv32i8:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; ZVFH-NEXT:    vmv.v.v v8, v16
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv32f16_nxv32i8:
@@ -1730,9 +1716,9 @@ define <vscale x 32 x i8> @vfptoui_nxv32f16_nxv32i8(<vscale x 32 x half> %va) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v12, v16
+; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; ZVFHMIN-NEXT:    vnsrl.wi v8, v12, 0
+; ZVFHMIN-NEXT:    vnsrl.wi v8, v8, 0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v12, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
@@ -1784,8 +1770,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f32_nxv1i1(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x float> %va to <vscale x 1 x i1>
@@ -1796,8 +1782,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f32_nxv1i1(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x float> %va to <vscale x 1 x i1>
@@ -1808,9 +1794,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f32_nxv1i8(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x float> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1820,9 +1806,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f32_nxv1i8(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x float> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1832,8 +1818,7 @@ define <vscale x 1 x i16> @vfptosi_nxv1f32_nxv1i16(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x float> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -1843,8 +1828,7 @@ define <vscale x 1 x i16> @vfptoui_nxv1f32_nxv1i16(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x float> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -1896,8 +1880,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f32_nxv2i1(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i1>
@@ -1908,8 +1892,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f32_nxv2i1(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i1>
@@ -1920,9 +1904,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f32_nxv2i8(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -1932,9 +1916,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f32_nxv2i8(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -1944,8 +1928,7 @@ define <vscale x 2 x i16> @vfptosi_nxv2f32_nxv2i16(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -1955,8 +1938,7 @@ define <vscale x 2 x i16> @vfptoui_nxv2f32_nxv2i16(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -1986,8 +1968,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2f32_nxv2i64(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -1997,8 +1979,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2f32_nxv2i64(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %evec
@@ -2008,8 +1990,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f32_nxv4i1(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i1>
@@ -2020,8 +2002,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f32_nxv4i1(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i1>
@@ -2032,9 +2014,9 @@ define <vscale x 4 x i8> @vfptosi_nxv4f32_nxv4i8(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -2044,9 +2026,9 @@ define <vscale x 4 x i8> @vfptoui_nxv4f32_nxv4i8(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -2056,8 +2038,7 @@ define <vscale x 4 x i16> @vfptosi_nxv4f32_nxv4i16(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -2067,8 +2048,7 @@ define <vscale x 4 x i16> @vfptoui_nxv4f32_nxv4i16(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -2098,8 +2078,8 @@ define <vscale x 4 x i64> @vfptosi_nxv4f32_nxv4i64(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -2109,8 +2089,8 @@ define <vscale x 4 x i64> @vfptoui_nxv4f32_nxv4i64(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i64>
   ret <vscale x 4 x i64> %evec
@@ -2120,8 +2100,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f32_nxv8i1(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i1>
@@ -2132,8 +2112,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f32_nxv8i1(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i1>
@@ -2144,9 +2124,9 @@ define <vscale x 8 x i8> @vfptosi_nxv8f32_nxv8i8(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -2156,9 +2136,9 @@ define <vscale x 8 x i8> @vfptoui_nxv8f32_nxv8i8(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -2168,8 +2148,7 @@ define <vscale x 8 x i16> @vfptosi_nxv8f32_nxv8i16(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -2179,8 +2158,7 @@ define <vscale x 8 x i16> @vfptoui_nxv8f32_nxv8i16(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -2210,8 +2188,8 @@ define <vscale x 8 x i64> @vfptosi_nxv8f32_nxv8i64(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -2221,8 +2199,8 @@ define <vscale x 8 x i64> @vfptoui_nxv8f32_nxv8i64(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i64>
   ret <vscale x 8 x i64> %evec
@@ -2232,8 +2210,8 @@ define <vscale x 16 x i1> @vfptosi_nxv16f32_nxv16i1(<vscale x 16 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 16 x float> %va to <vscale x 16 x i1>
@@ -2244,8 +2222,8 @@ define <vscale x 16 x i1> @vfptoui_nxv16f32_nxv16i1(<vscale x 16 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 16 x float> %va to <vscale x 16 x i1>
@@ -2256,9 +2234,9 @@ define <vscale x 16 x i8> @vfptosi_nxv16f32_nxv16i8(<vscale x 16 x float> %va) {
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 16 x float> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -2268,9 +2246,9 @@ define <vscale x 16 x i8> @vfptoui_nxv16f32_nxv16i8(<vscale x 16 x float> %va) {
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 16 x float> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -2280,8 +2258,7 @@ define <vscale x 16 x i16> @vfptosi_nxv16f32_nxv16i16(<vscale x 16 x float> %va)
 ; CHECK-LABEL: vfptosi_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 16 x float> %va to <vscale x 16 x i16>
   ret <vscale x 16 x i16> %evec
@@ -2291,8 +2268,7 @@ define <vscale x 16 x i16> @vfptoui_nxv16f32_nxv16i16(<vscale x 16 x float> %va)
 ; CHECK-LABEL: vfptoui_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 16 x float> %va to <vscale x 16 x i16>
   ret <vscale x 16 x i16> %evec
@@ -2322,8 +2298,8 @@ define <vscale x 1 x i1> @vfptosi_nxv1f64_nxv1i1(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i1>
@@ -2334,8 +2310,8 @@ define <vscale x 1 x i1> @vfptoui_nxv1f64_nxv1i1(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i1>
@@ -2346,9 +2322,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2360,9 +2336,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2374,9 +2350,9 @@ define <vscale x 1 x i16> @vfptosi_nxv1f64_nxv1i16(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -2386,9 +2362,9 @@ define <vscale x 1 x i16> @vfptoui_nxv1f64_nxv1i16(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -2398,8 +2374,7 @@ define <vscale x 1 x i32> @vfptosi_nxv1f64_nxv1i32(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i32>
   ret <vscale x 1 x i32> %evec
@@ -2409,8 +2384,7 @@ define <vscale x 1 x i32> @vfptoui_nxv1f64_nxv1i32(<vscale x 1 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i32>
   ret <vscale x 1 x i32> %evec
@@ -2440,8 +2414,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2f64_nxv2i1(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i1>
@@ -2452,8 +2426,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2f64_nxv2i1(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i1>
@@ -2464,9 +2438,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2478,9 +2452,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2492,9 +2466,9 @@ define <vscale x 2 x i16> @vfptosi_nxv2f64_nxv2i16(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -2504,9 +2478,9 @@ define <vscale x 2 x i16> @vfptoui_nxv2f64_nxv2i16(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -2516,8 +2490,7 @@ define <vscale x 2 x i32> @vfptosi_nxv2f64_nxv2i32(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i32>
   ret <vscale x 2 x i32> %evec
@@ -2527,8 +2500,7 @@ define <vscale x 2 x i32> @vfptoui_nxv2f64_nxv2i32(<vscale x 2 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i32>
   ret <vscale x 2 x i32> %evec
@@ -2558,8 +2530,8 @@ define <vscale x 4 x i1> @vfptosi_nxv4f64_nxv4i1(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x double> %va to <vscale x 4 x i1>
@@ -2570,8 +2542,8 @@ define <vscale x 4 x i1> @vfptoui_nxv4f64_nxv4i1(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x double> %va to <vscale x 4 x i1>
@@ -2582,9 +2554,9 @@ define <vscale x 4 x i8> @vfptosi_nxv4f64_nxv4i8(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2596,9 +2568,9 @@ define <vscale x 4 x i8> @vfptoui_nxv4f64_nxv4i8(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -2610,9 +2582,9 @@ define <vscale x 4 x i16> @vfptosi_nxv4f64_nxv4i16(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x double> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -2622,9 +2594,9 @@ define <vscale x 4 x i16> @vfptoui_nxv4f64_nxv4i16(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x double> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -2634,8 +2606,7 @@ define <vscale x 4 x i32> @vfptosi_nxv4f64_nxv4i32(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x double> %va to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %evec
@@ -2645,8 +2616,7 @@ define <vscale x 4 x i32> @vfptoui_nxv4f64_nxv4i32(<vscale x 4 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x double> %va to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %evec
@@ -2676,8 +2646,8 @@ define <vscale x 8 x i1> @vfptosi_nxv8f64_nxv8i1(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i1>
@@ -2688,8 +2658,8 @@ define <vscale x 8 x i1> @vfptoui_nxv8f64_nxv8i1(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vand.vi v8, v16, 1
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
 ; CHECK-NEXT:    vmsne.vi v0, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i1>
@@ -2700,11 +2670,11 @@ define <vscale x 8 x i8> @vfptosi_nxv8f64_nxv8i8(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -2714,11 +2684,11 @@ define <vscale x 8 x i8> @vfptoui_nxv8f64_nxv8i8(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -2728,9 +2698,9 @@ define <vscale x 8 x i16> @vfptosi_nxv8f64_nxv8i16(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -2740,9 +2710,9 @@ define <vscale x 8 x i16> @vfptoui_nxv8f64_nxv8i16(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -2752,8 +2722,7 @@ define <vscale x 8 x i32> @vfptosi_nxv8f64_nxv8i32(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptosi_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
@@ -2763,8 +2732,7 @@ define <vscale x 8 x i32> @vfptoui_nxv8f64_nxv8i32(<vscale x 8 x double> %va) {
 ; CHECK-LABEL: vfptoui_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
index 33decd8aa1b91..db9a47eb4059a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll
@@ -102,9 +102,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
index f94f709626443..d4d3cf3b7e485 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll
@@ -93,9 +93,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2bf16(<vscale x 2 x bfloat> %va, <
 ; CHECK-LABEL: vfptosi_nxv2i64_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -105,9 +105,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2bf16_unmasked(<vscale x 2 x bfloa
 ; CHECK-LABEL: vfptosi_nxv2i64_nxv2bf16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -119,8 +119,7 @@ define <vscale x 2 x i7> @vfptosi_v4i7_v4f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFH-LABEL: vfptosi_v4i7_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_v4i7_v4f16:
@@ -141,8 +140,7 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f16(<vscale x 2 x half> %va, <vscal
 ; ZVFH-LABEL: vfptosi_nxv2i8_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv2i8_nxv2f16:
@@ -161,8 +159,7 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f16_unmasked(<vscale x 2 x half> %v
 ; ZVFH-LABEL: vfptosi_nxv2i8_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv2i8_nxv2f16_unmasked:
@@ -259,17 +256,17 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f16(<vscale x 2 x half> %va, <vsc
 ; ZVFH-LABEL: vfptosi_nxv2i64_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFH-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptosi_nxv2i64_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -279,9 +276,9 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f16_unmasked(<vscale x 2 x half>
 ; CHECK-LABEL: vfptosi_nxv2i64_nxv2f16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -293,9 +290,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f32(<vscale x 2 x float> %va, <vsca
 ; CHECK-LABEL: vfptosi_nxv2i8_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
@@ -305,9 +302,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f32_unmasked(<vscale x 2 x float> %
 ; CHECK-LABEL: vfptosi_nxv2i8_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i8> %v
@@ -319,8 +316,7 @@ define <vscale x 2 x i16> @vfptosi_nxv2i16_nxv2f32(<vscale x 2 x float> %va, <vs
 ; CHECK-LABEL: vfptosi_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -330,8 +326,7 @@ define <vscale x 2 x i16> @vfptosi_nxv2i16_nxv2f32_unmasked(<vscale x 2 x float>
 ; CHECK-LABEL: vfptosi_nxv2i16_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -365,8 +360,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f32(<vscale x 2 x float> %va, <vs
 ; CHECK-LABEL: vfptosi_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -376,8 +371,8 @@ define <vscale x 2 x i64> @vfptosi_nxv2i64_nxv2f32_unmasked(<vscale x 2 x float>
 ; CHECK-LABEL: vfptosi_nxv2i64_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -389,9 +384,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptosi_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -403,9 +398,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2i8_nxv2f64_unmasked(<vscale x 2 x double>
 ; CHECK-LABEL: vfptosi_nxv2i8_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -419,9 +414,9 @@ define <vscale x 2 x i16> @vfptosi_nxv2i16_nxv2f64(<vscale x 2 x double> %va, <v
 ; CHECK-LABEL: vfptosi_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -431,9 +426,9 @@ define <vscale x 2 x i16> @vfptosi_nxv2i16_nxv2f64_unmasked(<vscale x 2 x double
 ; CHECK-LABEL: vfptosi_nxv2i16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -445,8 +440,7 @@ define <vscale x 2 x i32> @vfptosi_nxv2i32_nxv2f64(<vscale x 2 x double> %va, <v
 ; CHECK-LABEL: vfptosi_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
@@ -456,8 +450,7 @@ define <vscale x 2 x i32> @vfptosi_nxv2i32_nxv2f64_unmasked(<vscale x 2 x double
 ; CHECK-LABEL: vfptosi_nxv2i32_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
index 59c6791c12f79..703c043bcc632 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll
@@ -102,9 +102,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptoui_nxv2i1_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v10, v8, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
index 7aae383049deb..4b7d9a36858af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll
@@ -93,9 +93,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2bf16(<vscale x 2 x bfloat> %va, <
 ; CHECK-LABEL: vfptoui_nxv2i64_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -105,9 +105,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2bf16_unmasked(<vscale x 2 x bfloa
 ; CHECK-LABEL: vfptoui_nxv2i64_nxv2bf16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -119,8 +119,7 @@ define <vscale x 2 x i7> @vfptoui_v4i7_v4f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFH-LABEL: vfptoui_v4i7_v4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_v4i7_v4f16:
@@ -141,8 +140,7 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f16(<vscale x 2 x half> %va, <vscal
 ; ZVFH-LABEL: vfptoui_nxv2i8_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv2i8_nxv2f16:
@@ -161,8 +159,7 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f16_unmasked(<vscale x 2 x half> %v
 ; ZVFH-LABEL: vfptoui_nxv2i8_nxv2f16_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv2i8_nxv2f16_unmasked:
@@ -259,17 +256,17 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f16(<vscale x 2 x half> %va, <vsc
 ; ZVFH-LABEL: vfptoui_nxv2i64_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
+; ZVFH-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; ZVFH-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vfptoui_nxv2i64_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -279,9 +276,9 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f16_unmasked(<vscale x 2 x half>
 ; CHECK-LABEL: vfptoui_nxv2i64_nxv2f16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -293,9 +290,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f32(<vscale x 2 x float> %va, <vsca
 ; CHECK-LABEL: vfptoui_nxv2i8_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
@@ -305,9 +302,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f32_unmasked(<vscale x 2 x float> %
 ; CHECK-LABEL: vfptoui_nxv2i8_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i8> %v
@@ -319,8 +316,7 @@ define <vscale x 2 x i16> @vfptoui_nxv2i16_nxv2f32(<vscale x 2 x float> %va, <vs
 ; CHECK-LABEL: vfptoui_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -330,8 +326,7 @@ define <vscale x 2 x i16> @vfptoui_nxv2i16_nxv2f32_unmasked(<vscale x 2 x float>
 ; CHECK-LABEL: vfptoui_nxv2i16_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -365,8 +360,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f32(<vscale x 2 x float> %va, <vs
 ; CHECK-LABEL: vfptoui_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -376,8 +371,8 @@ define <vscale x 2 x i64> @vfptoui_nxv2i64_nxv2f32_unmasked(<vscale x 2 x float>
 ; CHECK-LABEL: vfptoui_nxv2i64_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i64> %v
@@ -389,9 +384,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f64(<vscale x 2 x double> %va, <vsc
 ; CHECK-LABEL: vfptoui_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -403,9 +398,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2i8_nxv2f64_unmasked(<vscale x 2 x double>
 ; CHECK-LABEL: vfptoui_nxv2i8_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -419,9 +414,9 @@ define <vscale x 2 x i16> @vfptoui_nxv2i16_nxv2f64(<vscale x 2 x double> %va, <v
 ; CHECK-LABEL: vfptoui_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -431,9 +426,9 @@ define <vscale x 2 x i16> @vfptoui_nxv2i16_nxv2f64_unmasked(<vscale x 2 x double
 ; CHECK-LABEL: vfptoui_nxv2i16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -445,8 +440,7 @@ define <vscale x 2 x i32> @vfptoui_nxv2i32_nxv2f64(<vscale x 2 x double> %va, <v
 ; CHECK-LABEL: vfptoui_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
@@ -456,8 +450,7 @@ define <vscale x 2 x i32> @vfptoui_nxv2i32_nxv2f64_unmasked(<vscale x 2 x double
 ; CHECK-LABEL: vfptoui_nxv2i32_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll
index 566920d577ce1..799e78bbcc6b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x float> @vfptrunc_nxv1f64_nxv1f32(<vscale x 1 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x float> @llvm.experimental.constrained.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x float> %evec
@@ -25,9 +24,9 @@ define <vscale x 1 x half> @vfptrunc_nxv1f64_nxv1f16(<vscale x 1 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f64(<vscale x 1 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -38,8 +37,7 @@ define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) s
 ; CHECK-LABEL: vfptrunc_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.fptrunc.nxv1f16.nxv1f32(<vscale x 1 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -50,8 +48,7 @@ define <vscale x 2 x float> @vfptrunc_nxv2f64_nxv2f32(<vscale x 2 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x float> @llvm.experimental.constrained.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x float> %evec
@@ -62,9 +59,9 @@ define <vscale x 2 x half> @vfptrunc_nxv2f64_nxv2f16(<vscale x 2 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -75,8 +72,7 @@ define <vscale x 2 x half> @vfptrunc_nxv2f32_nxv2f16(<vscale x 2 x float> %va) s
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -87,8 +83,7 @@ define <vscale x 4 x float> @vfptrunc_nxv4f64_nxv4f32(<vscale x 4 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -99,9 +94,9 @@ define <vscale x 4 x half> @vfptrunc_nxv4f64_nxv4f16(<vscale x 4 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f64(<vscale x 4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -112,8 +107,7 @@ define <vscale x 4 x half> @vfptrunc_nxv4f32_nxv4f16(<vscale x 4 x float> %va) s
 ; CHECK-LABEL: vfptrunc_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.fptrunc.nxv4f16.nxv4f32(<vscale x 4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -124,8 +118,7 @@ define <vscale x 8 x float> @vfptrunc_nxv8f64_nxv8f32(<vscale x 8 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -136,9 +129,9 @@ define <vscale x 8 x half> @vfptrunc_nxv8f64_nxv8f16(<vscale x 8 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f64(<vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -149,8 +142,7 @@ define <vscale x 8 x half> @vfptrunc_nxv8f32_nxv8f16(<vscale x 8 x float> %va) s
 ; CHECK-LABEL: vfptrunc_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f32(<vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -161,9 +153,9 @@ define <vscale x 1 x bfloat> @vfptrunc_nxv1f64_nxv1bf16(<vscale x 1 x double> %v
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f64(<vscale x 1 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x bfloat> %evec
@@ -174,8 +166,7 @@ define <vscale x 1 x bfloat> @vfptrunc_nxv1f32_nxv1bf16(<vscale x 1 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv1f32_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f32(<vscale x 1 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x bfloat> %evec
@@ -186,9 +177,9 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2f64_nxv2bf16(<vscale x 2 x double> %v
 ; CHECK-LABEL: vfptrunc_nxv2f64_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x bfloat> %evec
@@ -199,8 +190,7 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2f32_nxv2bf16(<vscale x 2 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x bfloat> %evec
@@ -211,9 +201,9 @@ define <vscale x 4 x bfloat> @vfptrunc_nxv4f64_nxv4bf16(<vscale x 4 x double> %v
 ; CHECK-LABEL: vfptrunc_nxv4f64_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f64(<vscale x 4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x bfloat> %evec
@@ -224,8 +214,7 @@ define <vscale x 4 x bfloat> @vfptrunc_nxv4f32_nxv4bf16(<vscale x 4 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv4f32_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f32(<vscale x 4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x bfloat> %evec
@@ -236,9 +225,9 @@ define <vscale x 8 x bfloat> @vfptrunc_nxv8f64_nxv8bf16(<vscale x 8 x double> %v
 ; CHECK-LABEL: vfptrunc_nxv8f64_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f64(<vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x bfloat> %evec
@@ -249,8 +238,7 @@ define <vscale x 8 x bfloat> @vfptrunc_nxv8f32_nxv8bf16(<vscale x 8 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv8f32_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f32(<vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x bfloat> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
index dcec2200b1308..86aaee5e86e4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
@@ -13,8 +13,7 @@ define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) {
 ; CHECK-LABEL: vfptrunc_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 1 x float> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -25,8 +24,7 @@ define <vscale x 2 x half> @vfptrunc_nxv2f32_nxv2f16(<vscale x 2 x float> %va) {
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 2 x float> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -37,8 +35,7 @@ define <vscale x 4 x half> @vfptrunc_nxv4f32_nxv4f16(<vscale x 4 x float> %va) {
 ; CHECK-LABEL: vfptrunc_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 4 x float> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -49,8 +46,7 @@ define <vscale x 8 x half> @vfptrunc_nxv8f32_nxv8f16(<vscale x 8 x float> %va) {
 ; CHECK-LABEL: vfptrunc_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 8 x float> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -61,8 +57,7 @@ define <vscale x 16 x half> @vfptrunc_nxv16f32_nxv16f16(<vscale x 16 x float> %v
 ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 16 x float> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -73,9 +68,9 @@ define <vscale x 1 x half> @vfptrunc_nxv1f64_nxv1f16(<vscale x 1 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -86,8 +81,7 @@ define <vscale x 1 x float> @vfptrunc_nxv1f64_nxv1f32(<vscale x 1 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x float>
   ret <vscale x 1 x float> %evec
@@ -98,9 +92,9 @@ define <vscale x 2 x half> @vfptrunc_nxv2f64_nxv2f16(<vscale x 2 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -111,8 +105,7 @@ define <vscale x 2 x float> @vfptrunc_nxv2f64_nxv2f32(<vscale x 2 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 2 x double> %va to <vscale x 2 x float>
   ret <vscale x 2 x float> %evec
@@ -123,9 +116,9 @@ define <vscale x 4 x half> @vfptrunc_nxv4f64_nxv4f16(<vscale x 4 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -136,8 +129,7 @@ define <vscale x 4 x float> @vfptrunc_nxv4f64_nxv4f32(<vscale x 4 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 4 x double> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -148,9 +140,9 @@ define <vscale x 8 x half> @vfptrunc_nxv8f64_nxv8f16(<vscale x 8 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -161,8 +153,7 @@ define <vscale x 8 x float> @vfptrunc_nxv8f64_nxv8f32(<vscale x 8 x double> %va)
 ; CHECK-LABEL: vfptrunc_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -173,8 +164,7 @@ define <vscale x 1 x bfloat> @vfptrunc_nxv1f32_nxv1bf16(<vscale x 1 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv1f32_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 1 x float> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -185,8 +175,7 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2f32_nxv2bf16(<vscale x 2 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 2 x float> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -197,8 +186,7 @@ define <vscale x 4 x bfloat> @vfptrunc_nxv4f32_nxv4bf16(<vscale x 4 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv4f32_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 4 x float> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -209,8 +197,7 @@ define <vscale x 8 x bfloat> @vfptrunc_nxv8f32_nxv8bf16(<vscale x 8 x float> %va
 ; CHECK-LABEL: vfptrunc_nxv8f32_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 8 x float> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -221,8 +208,7 @@ define <vscale x 16 x bfloat> @vfptrunc_nxv16f32_nxv16bf16(<vscale x 16 x float>
 ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 16 x float> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -233,9 +219,9 @@ define <vscale x 1 x bfloat> @vfptrunc_nxv1f64_nxv1bf16(<vscale x 1 x double> %v
 ; CHECK-LABEL: vfptrunc_nxv1f64_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
index 03de35c212296..347ce31b494ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
@@ -10,8 +10,7 @@ define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <v
 ; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x half> %v
@@ -21,8 +20,7 @@ define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x floa
 ; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x half> %v
@@ -34,9 +32,9 @@ define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <
 ; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x half> %v
@@ -46,9 +44,9 @@ define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x doub
 ; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x half> %v
@@ -60,8 +58,7 @@ define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a,
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x float> %v
@@ -71,8 +68,7 @@ define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x dou
 ; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x float> %v
@@ -84,8 +80,7 @@ define <vscale x 7 x float> @vfptrunc_nxv7f32_nxv7f64(<vscale x 7 x double> %a,
 ; CHECK-LABEL: vfptrunc_nxv7f32_nxv7f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 7 x float> @llvm.vp.fptrunc.nxv7f64.nxv7f32(<vscale x 7 x double> %a, <vscale x 7 x i1> %m, i32 %vl)
   ret <vscale x 7 x float> %v
@@ -212,8 +207,7 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32(<vscale x 2 x float> %a,
 ; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x bfloat> %v
@@ -223,8 +217,7 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32_unmasked(<vscale x 2 x f
 ; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x bfloat> %v
@@ -236,9 +229,9 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64(<vscale x 2 x double> %a
 ; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x bfloat> %v
@@ -248,9 +241,9 @@ define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x d
 ; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
index eeb5f3bc984d3..7815c1d276285 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
@@ -18,9 +18,9 @@ define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) strictfp
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 1 x bfloat> @llvm.experimental.constrained.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x bfloat> %r
@@ -33,9 +33,9 @@ define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) strictfp
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 2 x bfloat> @llvm.experimental.constrained.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x bfloat> %r
@@ -48,9 +48,9 @@ define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) strictfp
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v10, v10
+; CHECK-NEXT:    vfsqrt.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 4 x bfloat> @llvm.experimental.constrained.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x bfloat> %r
@@ -63,9 +63,9 @@ define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) strictfp
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsqrt.v v12, v12
+; CHECK-NEXT:    vfsqrt.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 8 x bfloat> @llvm.experimental.constrained.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x bfloat> %r
@@ -78,9 +78,9 @@ define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) stric
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    vfsqrt.v v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 16 x bfloat> @llvm.experimental.constrained.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x bfloat> %r
@@ -95,9 +95,9 @@ define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) stric
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsqrt.v v16, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v24, v24
+; CHECK-NEXT:    vfsqrt.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
@@ -120,9 +120,9 @@ define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 1 x half> @llvm.experimental.constrained.sqrt.nxv1f16(<vscale x 1 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %r
@@ -142,9 +142,9 @@ define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 2 x half> @llvm.experimental.constrained.sqrt.nxv2f16(<vscale x 2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %r
@@ -164,9 +164,9 @@ define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 4 x half> @llvm.experimental.constrained.sqrt.nxv4f16(<vscale x 4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %r
@@ -186,9 +186,9 @@ define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 8 x half> @llvm.experimental.constrained.sqrt.nxv8f16(<vscale x 8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %r
@@ -208,9 +208,9 @@ define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 16 x half> @llvm.experimental.constrained.sqrt.nxv16f16(<vscale x 16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %r
@@ -232,9 +232,9 @@ define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) strictfp {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v24, v24
+; ZVFHMIN-NEXT:    vfsqrt.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
index 6d7662db2b157..cda1b9af0526b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
@@ -18,9 +18,9 @@ define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 1 x bfloat> @llvm.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v)
   ret <vscale x 1 x bfloat> %r
@@ -32,9 +32,9 @@ define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v)
   ret <vscale x 2 x bfloat> %r
@@ -46,9 +46,9 @@ define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v10, v10
+; CHECK-NEXT:    vfsqrt.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v)
   ret <vscale x 4 x bfloat> %r
@@ -60,9 +60,9 @@ define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsqrt.v v12, v12
+; CHECK-NEXT:    vfsqrt.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v)
   ret <vscale x 8 x bfloat> %r
@@ -74,9 +74,9 @@ define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    vfsqrt.v v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %r = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v)
   ret <vscale x 16 x bfloat> %r
@@ -90,9 +90,9 @@ define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsqrt.v v16, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v24, v24
+; CHECK-NEXT:    vfsqrt.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24
@@ -115,9 +115,9 @@ define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half> %v)
   ret <vscale x 1 x half> %r
@@ -137,9 +137,9 @@ define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %v)
   ret <vscale x 2 x half> %r
@@ -159,9 +159,9 @@ define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %v)
   ret <vscale x 4 x half> %r
@@ -181,9 +181,9 @@ define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %v)
   ret <vscale x 8 x half> %r
@@ -203,9 +203,9 @@ define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %r = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> %v)
   ret <vscale x 16 x half> %r
@@ -227,9 +227,9 @@ define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v24, v24
+; ZVFHMIN-NEXT:    vfsqrt.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
index 4336b27eb134a..a35f1a4e481f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
@@ -20,9 +20,9 @@ define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9, v0.t
+; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat>
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -50,9 +50,9 @@ define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9, v0.t
+; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -64,9 +64,9 @@ define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat>
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsqrt.v v9, v9
+; CHECK-NEXT:    vfsqrt.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -80,9 +80,9 @@ define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v10, v10, v0.t
+; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -94,9 +94,9 @@ define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat>
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsqrt.v v10, v10
+; CHECK-NEXT:    vfsqrt.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -110,9 +110,9 @@ define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsqrt.v v12, v12, v0.t
+; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -124,9 +124,9 @@ define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat>
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsqrt.v v12, v12
+; CHECK-NEXT:    vfsqrt.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -140,9 +140,9 @@ define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v16, v16, v0.t
+; CHECK-NEXT:    vfsqrt.v v8, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -154,9 +154,9 @@ define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloa
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsqrt.v v16, v16
+; CHECK-NEXT:    vfsqrt.v v8, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -247,9 +247,9 @@ define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -267,9 +267,9 @@ define <vscale x 1 x half> @vfsqrt_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -289,9 +289,9 @@ define <vscale x 2 x half> @vfsqrt_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -309,9 +309,9 @@ define <vscale x 2 x half> @vfsqrt_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -331,9 +331,9 @@ define <vscale x 4 x half> @vfsqrt_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -351,9 +351,9 @@ define <vscale x 4 x half> @vfsqrt_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -373,9 +373,9 @@ define <vscale x 8 x half> @vfsqrt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -393,9 +393,9 @@ define <vscale x 8 x half> @vfsqrt_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -415,9 +415,9 @@ define <vscale x 16 x half> @vfsqrt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v16, v16, v0.t
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -435,9 +435,9 @@ define <vscale x 16 x half> @vfsqrt_vv_nxv16f16_unmasked(<vscale x 16 x half> %v
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
+; ZVFHMIN-NEXT:    vfsqrt.v v8, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
index cd8f890251c77..574a2b5827369 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -37,9 +37,9 @@ define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -54,9 +54,9 @@ define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -72,9 +72,9 @@ define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -89,9 +89,9 @@ define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v12, v10
+; CHECK-NEXT:    vfsub.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -103,13 +103,13 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -124,9 +124,9 @@ define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v16, v12
+; CHECK-NEXT:    vfsub.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -138,13 +138,13 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v12, v8
+; CHECK-NEXT:    vfsub.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -157,13 +157,13 @@ define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v8, v12
+; CHECK-NEXT:    vfsub.vv v8, v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -178,9 +178,9 @@ define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v24, v16
+; CHECK-NEXT:    vfsub.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -192,13 +192,13 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a0
+; CHECK-NEXT:    vmv.v.x v12, a0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v16, v8
+; CHECK-NEXT:    vfsub.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -224,9 +224,9 @@ define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v0, v0, v8
+; CHECK-NEXT:    vfsub.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -266,9 +266,9 @@ define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v0, v8, v0
+; CHECK-NEXT:    vfsub.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -300,9 +300,9 @@ define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 1 x half> @llvm.experimental.constrained.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -324,9 +324,9 @@ define <vscale x 1 x half> @vfsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -348,9 +348,9 @@ define <vscale x 2 x half> @vfsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 2 x half> @llvm.experimental.constrained.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -372,9 +372,9 @@ define <vscale x 2 x half> @vfsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -396,9 +396,9 @@ define <vscale x 4 x half> @vfsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 4 x half> @llvm.experimental.constrained.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -416,13 +416,13 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -444,9 +444,9 @@ define <vscale x 8 x half> @vfsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 8 x half> @llvm.experimental.constrained.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -464,13 +464,13 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -489,13 +489,13 @@ define <vscale x 8 x half> @vfsub_fv_nxv8f16(<vscale x 8 x half> %va, half %b) s
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vmv.v.x v10, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -517,9 +517,9 @@ define <vscale x 16 x half> @vfsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfsub.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
 entry:
   %vc = call <vscale x 16 x half> @llvm.experimental.constrained.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -537,13 +537,13 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a0
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -576,9 +576,9 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -624,9 +624,9 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
index 550d8aad3ee20..ac5a58b8d21b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
@@ -19,9 +19,9 @@ define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fsub <vscale x 1 x bfloat> %va, %vb
   ret <vscale x 1 x bfloat> %vc
@@ -34,9 +34,9 @@ define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vf v9, v9, fa5
+; CHECK-NEXT:    vfsub.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -51,9 +51,9 @@ define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fsub <vscale x 2 x bfloat> %va, %vb
   ret <vscale x 2 x bfloat> %vc
@@ -66,9 +66,9 @@ define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vf v9, v9, fa5
+; CHECK-NEXT:    vfsub.vf v8, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -83,9 +83,9 @@ define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v12, v10
+; CHECK-NEXT:    vfsub.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fsub <vscale x 4 x bfloat> %va, %vb
   ret <vscale x 4 x bfloat> %vc
@@ -98,9 +98,9 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vf v10, v10, fa5
+; CHECK-NEXT:    vfsub.vf v8, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -115,9 +115,9 @@ define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v16, v12
+; CHECK-NEXT:    vfsub.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fsub <vscale x 8 x bfloat> %va, %vb
   ret <vscale x 8 x bfloat> %vc
@@ -130,9 +130,9 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vf v12, v12, fa5
+; CHECK-NEXT:    vfsub.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -147,9 +147,9 @@ define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfrsub.vf v12, v12, fa5
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -164,9 +164,9 @@ define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v24, v16
+; CHECK-NEXT:    vfsub.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %vc = fsub <vscale x 16 x bfloat> %va, %vb
   ret <vscale x 16 x bfloat> %vc
@@ -179,9 +179,9 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vf v16, v16, fa5
+; CHECK-NEXT:    vfsub.vf v8, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -207,9 +207,9 @@ define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v0, v0, v8
+; CHECK-NEXT:    vfsub.vv v8, v0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v16, v24
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -248,9 +248,9 @@ define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v0, v8, v0
+; CHECK-NEXT:    vfsub.vv v8, v8, v0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -281,9 +281,9 @@ define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fsub <vscale x 1 x half> %va, %vb
   ret <vscale x 1 x half> %vc
@@ -302,9 +302,9 @@ define <vscale x 1 x half> @vfsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfsub.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -325,9 +325,9 @@ define <vscale x 2 x half> @vfsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fsub <vscale x 2 x half> %va, %vb
   ret <vscale x 2 x half> %vc
@@ -346,9 +346,9 @@ define <vscale x 2 x half> @vfsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vfsub.vf v8, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -369,9 +369,9 @@ define <vscale x 4 x half> @vfsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fsub <vscale x 4 x half> %va, %vb
   ret <vscale x 4 x half> %vc
@@ -390,9 +390,9 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vfsub.vf v8, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -413,9 +413,9 @@ define <vscale x 8 x half> @vfsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fsub <vscale x 8 x half> %va, %vb
   ret <vscale x 8 x half> %vc
@@ -434,9 +434,9 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfsub.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -457,9 +457,9 @@ define <vscale x 8 x half> @vfsub_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfrsub.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vfrsub.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -480,9 +480,9 @@ define <vscale x 16 x half> @vfsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfsub.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %vc = fsub <vscale x 16 x half> %va, %vb
   ret <vscale x 16 x half> %vc
@@ -501,9 +501,9 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vfsub.vf v8, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -535,9 +535,9 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v0, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v24
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
@@ -582,9 +582,9 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
index dc0bfbd0f76dd..dd8e72a9d8e62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
@@ -21,9 +21,9 @@ define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -36,9 +36,9 @@ define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -53,9 +53,9 @@ define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -72,9 +72,9 @@ define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -91,9 +91,9 @@ define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -106,9 +106,9 @@ define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v9, v10
+; CHECK-NEXT:    vfsub.vv v8, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -123,9 +123,9 @@ define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -142,9 +142,9 @@ define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -161,9 +161,9 @@ define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v12, v10, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v12, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -176,9 +176,9 @@ define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v12, v10
+; CHECK-NEXT:    vfsub.vv v8, v12, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -189,13 +189,13 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v10, v8, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -208,13 +208,13 @@ define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v10, v8
+; CHECK-NEXT:    vfsub.vv v8, v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -231,9 +231,9 @@ define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v16, v12, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -246,9 +246,9 @@ define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v16, v12
+; CHECK-NEXT:    vfsub.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -259,13 +259,13 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v12, v8, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v12, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -278,13 +278,13 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v12, v8
+; CHECK-NEXT:    vfsub.vv v8, v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -301,9 +301,9 @@ define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v24, v16, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v24, v16, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -316,9 +316,9 @@ define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v24, v16
+; CHECK-NEXT:    vfsub.vv v8, v24, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x bfloat> %v
@@ -329,13 +329,13 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vfsub.vv v8, v16, v8, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -348,13 +348,13 @@ define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v12, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v16, v8
+; CHECK-NEXT:    vfsub.vv v8, v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -483,7 +483,7 @@ define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    vmv1r.v v7, v0
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vmv.v.x v24, a1
+; CHECK-NEXT:    vmv.v.x v16, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
@@ -496,9 +496,9 @@ define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    add a3, sp, a3
 ; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v20, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v24, v16, v0.t
@@ -550,20 +550,20 @@ define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat
 ; CHECK-NEXT:    fmv.x.h a1, fa0
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmset.m v24
-; CHECK-NEXT:    vmv.v.x v16, a1
+; CHECK-NEXT:    vmset.m v16
+; CHECK-NEXT:    vmv.v.x v24, a1
 ; CHECK-NEXT:    slli a1, a2, 1
 ; CHECK-NEXT:    srli a2, a2, 2
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v24, a2
+; CHECK-NEXT:    vslidedown.vx v0, v16, a2
 ; CHECK-NEXT:    sltu a2, a0, a3
 ; CHECK-NEXT:    addi a2, a2, -1
 ; CHECK-NEXT:    and a2, a2, a3
 ; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v28, v0.t
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vfsub.vv v16, v16, v24, v0.t
@@ -609,9 +609,9 @@ define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -630,9 +630,9 @@ define <vscale x 1 x half> @vfsub_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 1 x half> %v
@@ -653,9 +653,9 @@ define <vscale x 1 x half> @vfsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -678,9 +678,9 @@ define <vscale x 1 x half> @vfsub_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -703,9 +703,9 @@ define <vscale x 2 x half> @vfsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -724,9 +724,9 @@ define <vscale x 2 x half> @vfsub_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -747,9 +747,9 @@ define <vscale x 2 x half> @vfsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -772,9 +772,9 @@ define <vscale x 2 x half> @vfsub_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, h
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -797,9 +797,9 @@ define <vscale x 4 x half> @vfsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -818,9 +818,9 @@ define <vscale x 4 x half> @vfsub_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -837,13 +837,13 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -862,13 +862,13 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -891,9 +891,9 @@ define <vscale x 8 x half> @vfsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -912,9 +912,9 @@ define <vscale x 8 x half> @vfsub_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 8 x half> %v
@@ -931,13 +931,13 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -956,13 +956,13 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, h
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -985,9 +985,9 @@ define <vscale x 16 x half> @vfsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1006,9 +1006,9 @@ define <vscale x 16 x half> @vfsub_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vfsub.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 16 x half> %v
@@ -1025,13 +1025,13 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1050,13 +1050,13 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16_unmasked(<vscale x 16 x half> %va
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1203,7 +1203,7 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vmv1r.v v7, v0
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
-; ZVFHMIN-NEXT:    vmv.v.x v24, a1
+; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
@@ -1216,9 +1216,9 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    slli a3, a3, 3
 ; ZVFHMIN-NEXT:    add a3, sp, a3
 ; ZVFHMIN-NEXT:    addi a3, a3, 16
-; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v24, v16, v0.t
@@ -1276,20 +1276,20 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    csrr a2, vlenb
 ; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT:    vmset.m v24
-; ZVFHMIN-NEXT:    vmv.v.x v16, a1
+; ZVFHMIN-NEXT:    vmset.m v16
+; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 ; ZVFHMIN-NEXT:    slli a1, a2, 1
 ; ZVFHMIN-NEXT:    srli a2, a2, 2
 ; ZVFHMIN-NEXT:    sub a3, a0, a1
 ; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
+; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
 ; ZVFHMIN-NEXT:    sltu a2, a0, a3
 ; ZVFHMIN-NEXT:    addi a2, a2, -1
 ; ZVFHMIN-NEXT:    and a2, a2, a3
 ; ZVFHMIN-NEXT:    addi a3, sp, 16
-; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vs8r.v v24, (a3) # vscale x 64-byte Folded Spill
 ; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
index 8d41514161c61..a0f2ed9658d75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
@@ -88,9 +88,8 @@ define <vscale x 2 x double> @vfwadd_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
 ; CHECK-LABEL: vfwadd_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vfwadd.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vfwadd.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
   %vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -102,8 +101,8 @@ define <vscale x 2 x double> @vfwadd_vf_nxv2f64(<vscale x 2 x float> %va, float
 ; CHECK-LABEL: vfwadd_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -117,8 +116,8 @@ define <vscale x 2 x double> @vfwadd_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
 ; CHECK-LABEL: vfwadd_vf_nxv2f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -169,9 +168,8 @@ define <vscale x 4 x double> @vfwadd_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vfwadd_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vfwadd.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vfwadd.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
   %vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -183,8 +181,8 @@ define <vscale x 4 x double> @vfwadd_vf_nxv4f64(<vscale x 4 x float> %va, float
 ; CHECK-LABEL: vfwadd_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -198,8 +196,8 @@ define <vscale x 4 x double> @vfwadd_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
 ; CHECK-LABEL: vfwadd_vf_nxv4f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -250,9 +248,8 @@ define <vscale x 8 x double> @vfwadd_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vfwadd_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vfwadd.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vfwadd.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
   %vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -264,8 +261,8 @@ define <vscale x 8 x double> @vfwadd_vf_nxv8f64(<vscale x 8 x float> %va, float
 ; CHECK-LABEL: vfwadd_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -279,8 +276,8 @@ define <vscale x 8 x double> @vfwadd_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
 ; CHECK-LABEL: vfwadd_vf_nxv8f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
index df8ece342d9ee..ad276b537d5b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
@@ -116,10 +116,9 @@ define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v11, v10
+; CHECK-NEXT:    vfwadd.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -168,10 +167,9 @@ define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v14, v12
+; CHECK-NEXT:    vfwadd.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -220,10 +218,9 @@ define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16(<vs
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v20, v16
+; CHECK-NEXT:    vfwadd.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -323,10 +320,9 @@ define <vscale x 2 x double> @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v11, v10
+; CHECK-NEXT:    vfwadd.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -375,10 +371,9 @@ define <vscale x 4 x double> @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v14, v12
+; CHECK-NEXT:    vfwadd.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -427,10 +422,9 @@ define <vscale x 8 x double> @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vv v8, v20, v16
+; CHECK-NEXT:    vfwadd.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -581,9 +575,9 @@ define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -632,9 +626,9 @@ define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -683,9 +677,9 @@ define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -785,9 +779,9 @@ define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -836,9 +830,9 @@ define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -887,9 +881,9 @@ define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
index 8e97ba581d55c..296c004642d12 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
@@ -1290,9 +1290,9 @@ define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.wv v8, v10, v12
+; CHECK-NEXT:    vfwadd.wv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1309,9 +1309,9 @@ define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.wv v8, v12, v16
+; CHECK-NEXT:    vfwadd.wv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1347,9 +1347,9 @@ define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f3
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.wv v8, v10, v12
+; CHECK-NEXT:    vfwadd.wv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1366,9 +1366,9 @@ define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f3
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.wv v8, v12, v16
+; CHECK-NEXT:    vfwadd.wv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1385,9 +1385,9 @@ define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f3
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwadd.wv v8, v16, v24
+; CHECK-NEXT:    vfwadd.wv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
index 0ac2d472c70fb..2bb2945d6c1e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
@@ -102,8 +102,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
@@ -188,8 +188,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
@@ -274,8 +274,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
@@ -317,8 +317,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
@@ -360,8 +360,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
index cb6db0de733a8..114abaa9d8da8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
@@ -142,8 +142,8 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
@@ -185,8 +185,8 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
@@ -228,8 +228,8 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 3
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
@@ -357,8 +357,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
@@ -400,8 +400,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
@@ -443,8 +443,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
@@ -529,8 +529,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
@@ -572,8 +572,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
@@ -615,8 +615,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
index 59dacdaa82ea3..9f81595c31caf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
@@ -142,8 +142,8 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
@@ -185,8 +185,8 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
@@ -228,8 +228,8 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
@@ -357,8 +357,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
@@ -400,8 +400,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
@@ -443,8 +443,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
@@ -529,8 +529,8 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
@@ -572,8 +572,8 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
@@ -615,8 +615,8 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
index 3d1fdda3af3ad..d4f62f9970364 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
@@ -189,8 +189,8 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
@@ -277,8 +277,8 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
@@ -321,8 +321,8 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
@@ -365,8 +365,8 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
index 8eed12f2d2e72..e0489d18def7b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
@@ -145,8 +145,8 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
@@ -189,8 +189,8 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscal
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
@@ -277,8 +277,8 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
@@ -321,8 +321,8 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
@@ -365,8 +365,8 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
index da52e05a45f11..58d2dfdad16ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
@@ -107,9 +107,9 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -154,9 +154,9 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -201,9 +201,9 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -295,9 +295,9 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -342,9 +342,9 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -389,9 +389,9 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.x.f.v v8, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
index 01a82998d617c..96c8043f4965c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
@@ -107,9 +107,9 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -154,9 +154,9 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -201,9 +201,9 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -295,9 +295,9 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -342,9 +342,9 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -389,9 +389,9 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v16
+; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
index d232f83d07c10..fa25ba6edf764 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll
@@ -101,8 +101,8 @@ define <vscale x 4 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16(<vscale
 ; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16(
@@ -145,8 +145,8 @@ define <vscale x 8 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16(<vscale
 ; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16(
@@ -189,8 +189,8 @@ define <vscale x 16 x float> @intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16(<vsc
 ; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll
index 63113b8780989..ebb400e6eeed1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll
@@ -492,9 +492,9 @@ define <vscale x 4 x float> @vfwmacc_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v8, v12, v14
+; ZVFHMIN-NEXT:    vfmacc.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 4 x half> %vb to <vscale x 4 x float>
   %ve = fpext <vscale x 4 x half> %vc to <vscale x 4 x float>
@@ -536,9 +536,9 @@ define <vscale x 4 x float> @vfwnmacc_vv_nxv4f32(<vscale x 4 x float> %va, <vsca
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmacc.vv v8, v12, v14
+; ZVFHMIN-NEXT:    vfnmacc.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 4 x half> %vb to <vscale x 4 x float>
   %ve = fpext <vscale x 4 x half> %vc to <vscale x 4 x float>
@@ -609,9 +609,9 @@ define <vscale x 4 x float> @vfwmsac_vv_nxv4f32(<vscale x 4 x float> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmsac.vv v8, v12, v14
+; ZVFHMIN-NEXT:    vfmsac.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 4 x half> %vb to <vscale x 4 x float>
   %ve = fpext <vscale x 4 x half> %vc to <vscale x 4 x float>
@@ -655,9 +655,9 @@ define <vscale x 4 x float> @vfwnmsac_vv_nxv4f32(<vscale x 4 x float> %va, <vsca
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmsac.vv v8, v12, v14
+; ZVFHMIN-NEXT:    vfnmsac.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 4 x half> %vb to <vscale x 4 x float>
   %ve = fpext <vscale x 4 x half> %vc to <vscale x 4 x float>
@@ -727,9 +727,9 @@ define <vscale x 8 x float> @vfwmacc_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v8, v16, v20
+; ZVFHMIN-NEXT:    vfmacc.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 8 x half> %vb to <vscale x 8 x float>
   %ve = fpext <vscale x 8 x half> %vc to <vscale x 8 x float>
@@ -771,9 +771,9 @@ define <vscale x 8 x float> @vfwnmacc_vv_nxv8f32(<vscale x 8 x float> %va, <vsca
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmacc.vv v8, v16, v20
+; ZVFHMIN-NEXT:    vfnmacc.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 8 x half> %vb to <vscale x 8 x float>
   %ve = fpext <vscale x 8 x half> %vc to <vscale x 8 x float>
@@ -844,9 +844,9 @@ define <vscale x 8 x float> @vfwmsac_vv_nxv8f32(<vscale x 8 x float> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmsac.vv v8, v16, v20
+; ZVFHMIN-NEXT:    vfmsac.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 8 x half> %vb to <vscale x 8 x float>
   %ve = fpext <vscale x 8 x half> %vc to <vscale x 8 x float>
@@ -890,9 +890,9 @@ define <vscale x 8 x float> @vfwnmsac_vv_nxv8f32(<vscale x 8 x float> %va, <vsca
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmsac.vv v8, v16, v20
+; ZVFHMIN-NEXT:    vfnmsac.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 8 x half> %vb to <vscale x 8 x float>
   %ve = fpext <vscale x 8 x half> %vc to <vscale x 8 x float>
@@ -962,9 +962,9 @@ define <vscale x 16 x float> @vfwmacc_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmacc.vv v8, v24, v0
+; ZVFHMIN-NEXT:    vfmacc.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 16 x half> %vb to <vscale x 16 x float>
   %ve = fpext <vscale x 16 x half> %vc to <vscale x 16 x float>
@@ -1006,9 +1006,9 @@ define <vscale x 16 x float> @vfwnmacc_vv_nxv16f32(<vscale x 16 x float> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfnmacc.vv v8, v24, v0
+; ZVFHMIN-NEXT:    vfnmacc.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 16 x half> %vb to <vscale x 16 x float>
   %ve = fpext <vscale x 16 x half> %vc to <vscale x 16 x float>
@@ -1079,9 +1079,9 @@ define <vscale x 16 x float> @vfwmsac_vv_nxv16f32(<vscale x 16 x float> %va, <vs
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmsac.vv v8, v24, v0
+; ZVFHMIN-NEXT:    vfmsac.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 16 x half> %vb to <vscale x 16 x float>
   %ve = fpext <vscale x 16 x half> %vc to <vscale x 16 x float>
@@ -1125,9 +1125,9 @@ define <vscale x 16 x float> @vfwnmsac_vv_nxv16f32(<vscale x 16 x float> %va, <v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfnmsac.vv v8, v24, v0
+; ZVFHMIN-NEXT:    vfnmsac.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %vd = fpext <vscale x 16 x half> %vb to <vscale x 16 x float>
   %ve = fpext <vscale x 16 x half> %vc to <vscale x 16 x float>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index 4ef7ea5b52a75..03a02278e9d0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -414,11 +414,10 @@ define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -437,11 +436,10 @@ define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -461,9 +459,9 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -487,9 +485,9 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
@@ -518,11 +516,10 @@ define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -541,11 +538,10 @@ define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -565,9 +561,9 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -591,9 +587,9 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
@@ -622,9 +618,8 @@ define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscal
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -645,11 +640,10 @@ define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v0, v16
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -669,9 +663,9 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -695,9 +689,9 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
@@ -1032,9 +1026,9 @@ define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwmacc.vv v12, v10, v16, v0.t
+; CHECK-NEXT:    vfwmacc.vv v12, v10, v8, v0.t
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
@@ -1048,9 +1042,9 @@ define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x h
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v9
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwmacc.vv v12, v10, v16
+; CHECK-NEXT:    vfwmacc.vv v12, v10, v8
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -1066,9 +1060,9 @@ define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwmacc.vv v16, v12, v24, v0.t
+; CHECK-NEXT:    vfwmacc.vv v16, v12, v8, v0.t
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
@@ -1082,9 +1076,9 @@ define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x h
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwmacc.vv v16, v12, v24
+; CHECK-NEXT:    vfwmacc.vv v16, v12, v8
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll
index 6682aa1e17a30..49d70c070a925 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16-sdnode.ll
@@ -107,9 +107,9 @@ define <vscale x 4 x float> @vfwmaccbf16_vv_nxv4f32(<vscale x 4 x float> %a, <vs
 ; ZVFBFMIN:       # %bb.0:
 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v11
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v11
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v10
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float>
   %c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
@@ -153,9 +153,9 @@ define <vscale x 8 x float> @vfwmaccbf16_vv_nxv8f32(<vscale x 8 x float> %a, <vs
 ; ZVFBFMIN:       # %bb.0:
 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v14
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v14
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v12
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <vscale x 8 x bfloat> %b to <vscale x 8 x float>
   %c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
@@ -199,9 +199,9 @@ define <vscale x 16 x float> @vfwmaccbf16_vv_nxv16f32(<vscale x 16 x float> %a,
 ; ZVFBFMIN:       # %bb.0:
 ; ZVFBFMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v20
+; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v20
 ; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
+; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v16
 ; ZVFBFMIN-NEXT:    ret
   %b.ext = fpext <vscale x 16 x bfloat> %b to <vscale x 16 x float>
   %c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
index 36f8e99b27383..741b725d40801 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
@@ -338,11 +338,10 @@ define <vscale x 4 x float> @vmfsac_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x
 ; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmsub.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT:    vfmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -362,11 +361,10 @@ define <vscale x 4 x float> @vmfsac_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmsub.vv v8, v14, v10
+; ZVFHMIN-NEXT:    vfmsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -387,9 +385,9 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -413,11 +411,11 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v8
+; ZVFHMIN-NEXT:    vmv1r.v v9, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v13, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -442,9 +440,9 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
@@ -472,11 +470,10 @@ define <vscale x 8 x float> @vmfsac_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x
 ; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmsub.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT:    vfmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -496,11 +493,10 @@ define <vscale x 8 x float> @vmfsac_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmsub.vv v8, v20, v12
+; ZVFHMIN-NEXT:    vfmsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -521,9 +517,9 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -547,11 +543,11 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, h
 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v8
+; ZVFHMIN-NEXT:    vmv2r.v v10, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v20, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v18, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -576,9 +572,9 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfmsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
index 41f2c26c15808..07dda1ec6d52f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
@@ -51,9 +51,8 @@ define <vscale x 2 x double> @vfwmul_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
 ; CHECK-LABEL: vfwmul_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vfwmul.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vfwmul.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
   %vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -65,8 +64,8 @@ define <vscale x 2 x double> @vfwmul_vf_nxv2f64(<vscale x 2 x float> %va, float
 ; CHECK-LABEL: vfwmul_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -80,8 +79,8 @@ define <vscale x 2 x double> @vfwmul_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
 ; CHECK-LABEL: vfwmul_vf_nxv2f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -95,9 +94,8 @@ define <vscale x 4 x double> @vfwmul_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vfwmul_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vfwmul.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vfwmul.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
   %vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -109,8 +107,8 @@ define <vscale x 4 x double> @vfwmul_vf_nxv4f64(<vscale x 4 x float> %va, float
 ; CHECK-LABEL: vfwmul_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -124,8 +122,8 @@ define <vscale x 4 x double> @vfwmul_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
 ; CHECK-LABEL: vfwmul_vf_nxv4f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -139,9 +137,8 @@ define <vscale x 8 x double> @vfwmul_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vfwmul_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vfwmul.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vfwmul.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
   %vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -153,8 +150,8 @@ define <vscale x 8 x double> @vfwmul_vf_nxv8f64(<vscale x 8 x float> %va, float
 ; CHECK-LABEL: vfwmul_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -168,8 +165,8 @@ define <vscale x 8 x double> @vfwmul_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
 ; CHECK-LABEL: vfwmul_vf_nxv8f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
index d16c418cbf770..2b077e444e051 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
@@ -116,10 +116,9 @@ define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v11, v10
+; CHECK-NEXT:    vfwmul.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -168,10 +167,9 @@ define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v14, v12
+; CHECK-NEXT:    vfwmul.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -220,10 +218,9 @@ define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16(<vs
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v20, v16
+; CHECK-NEXT:    vfwmul.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -323,10 +320,9 @@ define <vscale x 2 x double> @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v11, v10
+; CHECK-NEXT:    vfwmul.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -375,10 +371,9 @@ define <vscale x 4 x double> @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v14, v12
+; CHECK-NEXT:    vfwmul.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -427,10 +422,9 @@ define <vscale x 8 x double> @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vv v8, v20, v16
+; CHECK-NEXT:    vfwmul.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -581,9 +575,9 @@ define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -632,9 +626,9 @@ define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -683,9 +677,9 @@ define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -785,9 +779,9 @@ define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -836,9 +830,9 @@ define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -887,9 +881,9 @@ define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
index fa328356ab585..5bfd819c46265 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
@@ -299,11 +299,10 @@ define <vscale x 4 x float> @vfnmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmadd.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT:    vfnmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -324,11 +323,10 @@ define <vscale x 4 x float> @vfnmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmadd.vv v8, v14, v10
+; ZVFHMIN-NEXT:    vfnmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -350,9 +348,9 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -378,11 +376,11 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v8
+; ZVFHMIN-NEXT:    vmv1r.v v9, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v13, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -408,9 +406,9 @@ define <vscale x 4 x float> @vfnmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
@@ -439,11 +437,10 @@ define <vscale x 8 x float> @vfnmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmadd.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT:    vfnmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -464,11 +461,10 @@ define <vscale x 8 x float> @vfnmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmadd.vv v8, v20, v12
+; ZVFHMIN-NEXT:    vfnmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -490,9 +486,9 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -518,11 +514,11 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v8
+; ZVFHMIN-NEXT:    vmv2r.v v10, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v20, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v18, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -548,9 +544,9 @@ define <vscale x 8 x float> @vfnmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
@@ -579,9 +575,8 @@ define <vscale x 16 x float> @vfnmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vsca
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -604,11 +599,10 @@ define <vscale x 16 x float> @vfnmacc_vv_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmacc_vv_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfnmadd.vv v8, v0, v16
+; ZVFHMIN-NEXT:    vfnmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -630,9 +624,9 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -658,11 +652,11 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; ZVFHMIN-NEXT:    vmv4r.v v12, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v28, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -688,9 +682,9 @@ define <vscale x 16 x float> @vfnmacc_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmadd.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
@@ -1131,9 +1125,9 @@ define <vscale x 4 x double> @vfnmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwnmacc.vv v12, v10, v16, v0.t
+; CHECK-NEXT:    vfwnmacc.vv v12, v10, v8, v0.t
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
@@ -1149,9 +1143,9 @@ define <vscale x 4 x double> @vfnmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v9
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfwnmacc.vv v12, v10, v16
+; CHECK-NEXT:    vfwnmacc.vv v12, v10, v8
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -1169,9 +1163,9 @@ define <vscale x 8 x double> @vfnmacc_vv_nxv8f64_nxv4f16(<vscale x 8 x half> %a,
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v10, v0.t
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwnmacc.vv v16, v12, v24, v0.t
+; CHECK-NEXT:    vfwnmacc.vv v16, v12, v8, v0.t
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
@@ -1187,9 +1181,9 @@ define <vscale x 8 x double> @vfnmacc_vv_nxv8f64_nxv4f16_unmasked(<vscale x 8 x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.f.v v24, v10
+; CHECK-NEXT:    vfwcvt.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfwnmacc.vv v16, v12, v24
+; CHECK-NEXT:    vfwnmacc.vv v16, v12, v8
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
index cc0ae35780a60..13084b05e16df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll
@@ -287,11 +287,10 @@ define <vscale x 4 x float> @vfnmsac_vv_nxv4f32(<vscale x 4 x half> %a, <vscale
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv4f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmsub.vv v8, v14, v10, v0.t
+; ZVFHMIN-NEXT:    vfnmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -311,11 +310,10 @@ define <vscale x 4 x float> @vfnmsac_vv_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv4f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfnmsub.vv v8, v14, v10
+; ZVFHMIN-NEXT:    vfnmsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
@@ -336,9 +334,9 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -362,11 +360,11 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_commute(<vscale x 4 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv4f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv1r.v v12, v8
+; ZVFHMIN-NEXT:    vmv1r.v v9, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v14, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v14, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v13, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v13, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v12, v10, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -391,9 +389,9 @@ define <vscale x 4 x float> @vfnmsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v14, a1
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v14
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v12, v10
 ; ZVFHMIN-NEXT:    ret
@@ -421,11 +419,10 @@ define <vscale x 8 x float> @vfnmsac_vv_nxv8f32(<vscale x 8 x half> %a, <vscale
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv8f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmsub.vv v8, v20, v12, v0.t
+; ZVFHMIN-NEXT:    vfnmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
@@ -445,11 +442,10 @@ define <vscale x 8 x float> @vfnmsac_vv_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv8f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfnmsub.vv v8, v20, v12
+; ZVFHMIN-NEXT:    vfnmsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
@@ -470,9 +466,9 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -496,11 +492,11 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_commute(<vscale x 8 x half> %a,
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv8f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv2r.v v16, v8
+; ZVFHMIN-NEXT:    vmv2r.v v10, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v20, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v20, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v18, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v18, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v16, v12, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -525,9 +521,9 @@ define <vscale x 8 x float> @vfnmsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v20, a1
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v16, v12
 ; ZVFHMIN-NEXT:    ret
@@ -555,9 +551,8 @@ define <vscale x 16 x float> @vfnmsac_vv_nxv16f32(<vscale x 16 x half> %a, <vsca
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv16f32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v4, v12
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -579,11 +574,10 @@ define <vscale x 16 x float> @vfnmsac_vv_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN-LABEL: vfnmsac_vv_nxv16f32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v24, v12
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfnmsub.vv v8, v0, v16
+; ZVFHMIN-NEXT:    vfnmsub.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
   %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
   %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
@@ -604,9 +598,9 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32(<vscale x 16 x half> %a, half
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -630,11 +624,11 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_commute(<vscale x 16 x half> %
 ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; ZVFHMIN-NEXT:    vmv4r.v v12, v8
 ; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v4, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v4, v0.t
+; ZVFHMIN-NEXT:    vmv.v.x v28, a0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v28, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v24, v16, v0.t
 ; ZVFHMIN-NEXT:    ret
@@ -659,9 +653,9 @@ define <vscale x 16 x float> @vfnmsac_vf_nxv16f32_unmasked(<vscale x 16 x half>
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v4, a1
+; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; ZVFHMIN-NEXT:    vfnmsub.vv v8, v24, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
index 9393cbd4ccafc..b5e747a470967 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
@@ -88,9 +88,8 @@ define <vscale x 2 x double> @vfwsub_vv_nxv2f64(<vscale x 2 x float> %va, <vscal
 ; CHECK-LABEL: vfwsub_vv_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vfwsub.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vfwsub.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 2 x float> %va to <vscale x 2 x double>
   %vd = fpext <vscale x 2 x float> %vb to <vscale x 2 x double>
@@ -102,8 +101,8 @@ define <vscale x 2 x double> @vfwsub_vf_nxv2f64(<vscale x 2 x float> %va, float
 ; CHECK-LABEL: vfwsub_vf_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -117,8 +116,8 @@ define <vscale x 2 x double> @vfwsub_vf_nxv2f64_2(<vscale x 2 x float> %va, floa
 ; CHECK-LABEL: vfwsub_vf_nxv2f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 2 x double> poison, double %fpext, i32 0
@@ -169,9 +168,8 @@ define <vscale x 4 x double> @vfwsub_vv_nxv4f64(<vscale x 4 x float> %va, <vscal
 ; CHECK-LABEL: vfwsub_vv_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vfwsub.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vfwsub.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 4 x float> %va to <vscale x 4 x double>
   %vd = fpext <vscale x 4 x float> %vb to <vscale x 4 x double>
@@ -183,8 +181,8 @@ define <vscale x 4 x double> @vfwsub_vf_nxv4f64(<vscale x 4 x float> %va, float
 ; CHECK-LABEL: vfwsub_vf_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
@@ -198,8 +196,8 @@ define <vscale x 4 x double> @vfwsub_vf_nxv4f64_2(<vscale x 4 x float> %va, floa
 ; CHECK-LABEL: vfwsub_vf_nxv4f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 4 x double> poison, double %fpext, i32 0
@@ -250,9 +248,8 @@ define <vscale x 8 x double> @vfwsub_vv_nxv8f64(<vscale x 8 x float> %va, <vscal
 ; CHECK-LABEL: vfwsub_vv_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vfwsub.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vfwsub.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
   %vd = fpext <vscale x 8 x float> %vb to <vscale x 8 x double>
@@ -264,8 +261,8 @@ define <vscale x 8 x double> @vfwsub_vf_nxv8f64(<vscale x 8 x float> %va, float
 ; CHECK-LABEL: vfwsub_vf_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x float> poison, float %b, i32 0
   %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -279,8 +276,8 @@ define <vscale x 8 x double> @vfwsub_vf_nxv8f64_2(<vscale x 8 x float> %va, floa
 ; CHECK-LABEL: vfwsub_vf_nxv8f64_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    ret
   %fpext = fpext float %b to double
   %head = insertelement <vscale x 8 x double> poison, double %fpext, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
index 703a69d048311..b31541126a7f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
@@ -116,10 +116,9 @@ define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v11, v10
+; CHECK-NEXT:    vfwsub.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -168,10 +167,9 @@ define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v14, v12
+; CHECK-NEXT:    vfwsub.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -220,10 +218,9 @@ define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16(<vs
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v20, v16
+; CHECK-NEXT:    vfwsub.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -323,10 +320,9 @@ define <vscale x 2 x double> @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v11, v10
+; CHECK-NEXT:    vfwsub.vv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -375,10 +371,9 @@ define <vscale x 4 x double> @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v14, v12
+; CHECK-NEXT:    vfwsub.vv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -427,10 +422,9 @@ define <vscale x 8 x double> @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32(<vscal
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
+; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vv v8, v20, v16
+; CHECK-NEXT:    vfwsub.vv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -581,9 +575,9 @@ define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -632,9 +626,9 @@ define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -683,9 +677,9 @@ define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -785,9 +779,9 @@ define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -836,9 +830,9 @@ define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -887,9 +881,9 @@ define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
index ab45345a7a39d..7a6640ed89a07 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
@@ -1290,9 +1290,9 @@ define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.wv v8, v10, v12
+; CHECK-NEXT:    vfwsub.wv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1309,9 +1309,9 @@ define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.wv v8, v12, v16
+; CHECK-NEXT:    vfwsub.wv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1347,9 +1347,9 @@ define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f3
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.wv v8, v10, v12
+; CHECK-NEXT:    vfwsub.wv v8, v10, v9
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1366,9 +1366,9 @@ define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f3
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
+; CHECK-NEXT:    vmv2r.v v10, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.wv v8, v12, v16
+; CHECK-NEXT:    vfwsub.wv v8, v12, v10
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
@@ -1385,9 +1385,9 @@ define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f3
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
+; CHECK-NEXT:    vmv4r.v v12, v8
 ; CHECK-NEXT:    fsrmi a0, 0
-; CHECK-NEXT:    vfwsub.wv v8, v16, v24
+; CHECK-NEXT:    vfwsub.wv v8, v16, v12
 ; CHECK-NEXT:    fsrm a0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
index 599a66d191fd2..2dae21a01a66f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-constrained-sdnode.ll
@@ -140,8 +140,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i1_nxv2f64(<vscale x 2 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -153,8 +153,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i1_nxv2f64(<vscale x 2 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.uitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -192,8 +192,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i1_nxv4f32(<vscale x 4 x i1> %va) stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -205,8 +205,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i1_nxv4f32(<vscale x 4 x i1> %va) stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.uitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -218,8 +218,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i1_nxv4f64(<vscale x 4 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -231,8 +231,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i1_nxv4f64(<vscale x 4 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.uitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -244,8 +244,8 @@ define <vscale x 8 x half> @vsitofp_nxv8i1_nxv8f16(<vscale x 8 x i1> %va) strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.sitofp.nxv8f16.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -257,8 +257,8 @@ define <vscale x 8 x half> @vuitofp_nxv8i1_nxv8f16(<vscale x 8 x i1> %va) strict
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.uitofp.nxv8f16.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -270,8 +270,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i1_nxv8f32(<vscale x 8 x i1> %va) stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.sitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -283,8 +283,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i1_nxv8f32(<vscale x 8 x i1> %va) stric
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.uitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -296,8 +296,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i1_nxv8f64(<vscale x 8 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.sitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -309,8 +309,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i1_nxv8f64(<vscale x 8 x i1> %va) stri
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.uitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -322,8 +322,8 @@ define <vscale x 16 x half> @vsitofp_nxv16i1_nxv16f16(<vscale x 16 x i1> %va) st
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.sitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -335,8 +335,8 @@ define <vscale x 16 x half> @vuitofp_nxv16i1_nxv16f16(<vscale x 16 x i1> %va) st
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.uitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -348,8 +348,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i1_nxv16f32(<vscale x 16 x i1> %va) s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -361,8 +361,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i1_nxv16f32(<vscale x 16 x i1> %va) s
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.uitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -374,8 +374,8 @@ define <vscale x 32 x half> @vsitofp_nxv32i1_nxv32f16(<vscale x 32 x i1> %va) st
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x half> @llvm.experimental.constrained.sitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 32 x half> %evec
@@ -387,8 +387,8 @@ define <vscale x 32 x half> @vuitofp_nxv32i1_nxv32f16(<vscale x 32 x i1> %va) st
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x half> @llvm.experimental.constrained.uitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 32 x half> %evec
@@ -545,8 +545,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i8_nxv2f64(<vscale x 2 x i8> %va) stri
 ; CHECK-LABEL: vsitofp_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -557,8 +557,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i8_nxv2f64(<vscale x 2 x i8> %va) stri
 ; CHECK-LABEL: vuitofp_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -593,8 +593,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i8_nxv4f32(<vscale x 4 x i8> %va) stric
 ; CHECK-LABEL: vsitofp_nxv4i8_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -605,8 +605,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i8_nxv4f32(<vscale x 4 x i8> %va) stric
 ; CHECK-LABEL: vuitofp_nxv4i8_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.uitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -617,8 +617,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i8_nxv4f64(<vscale x 4 x i8> %va) stri
 ; CHECK-LABEL: vsitofp_nxv4i8_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -629,8 +629,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i8_nxv4f64(<vscale x 4 x i8> %va) stri
 ; CHECK-LABEL: vuitofp_nxv4i8_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.uitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -641,8 +641,8 @@ define <vscale x 8 x half> @vsitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) strict
 ; CHECK-LABEL: vsitofp_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.sitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -653,8 +653,8 @@ define <vscale x 8 x half> @vuitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) strict
 ; CHECK-LABEL: vuitofp_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.uitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -665,8 +665,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i8_nxv8f32(<vscale x 8 x i8> %va) stric
 ; CHECK-LABEL: vsitofp_nxv8i8_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.sitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -677,8 +677,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i8_nxv8f32(<vscale x 8 x i8> %va) stric
 ; CHECK-LABEL: vuitofp_nxv8i8_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.uitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -689,8 +689,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i8_nxv8f64(<vscale x 8 x i8> %va) stri
 ; CHECK-LABEL: vsitofp_nxv8i8_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.sitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -701,8 +701,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i8_nxv8f64(<vscale x 8 x i8> %va) stri
 ; CHECK-LABEL: vuitofp_nxv8i8_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.uitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -713,8 +713,8 @@ define <vscale x 16 x half> @vsitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) st
 ; CHECK-LABEL: vsitofp_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -725,8 +725,8 @@ define <vscale x 16 x half> @vuitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) st
 ; CHECK-LABEL: vuitofp_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.uitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -737,8 +737,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i8_nxv16f32(<vscale x 16 x i8> %va) s
 ; CHECK-LABEL: vsitofp_nxv16i8_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -749,8 +749,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i8_nxv16f32(<vscale x 16 x i8> %va) s
 ; CHECK-LABEL: vuitofp_nxv16i8_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.uitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -761,8 +761,8 @@ define <vscale x 32 x half> @vsitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) st
 ; CHECK-LABEL: vsitofp_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x half> @llvm.experimental.constrained.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 32 x half> %evec
@@ -773,8 +773,8 @@ define <vscale x 32 x half> @vuitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) st
 ; CHECK-LABEL: vuitofp_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 32 x half> @llvm.experimental.constrained.uitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 32 x half> %evec
@@ -901,8 +901,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i16_nxv2f64(<vscale x 2 x i16> %va) st
 ; CHECK-LABEL: vsitofp_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -913,8 +913,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i16_nxv2f64(<vscale x 2 x i16> %va) st
 ; CHECK-LABEL: vuitofp_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -947,8 +947,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) str
 ; CHECK-LABEL: vsitofp_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -959,8 +959,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) str
 ; CHECK-LABEL: vuitofp_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.uitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -971,8 +971,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i16_nxv4f64(<vscale x 4 x i16> %va) st
 ; CHECK-LABEL: vsitofp_nxv4i16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -983,8 +983,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i16_nxv4f64(<vscale x 4 x i16> %va) st
 ; CHECK-LABEL: vuitofp_nxv4i16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.uitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -1017,8 +1017,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) str
 ; CHECK-LABEL: vsitofp_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.sitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -1029,8 +1029,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) str
 ; CHECK-LABEL: vuitofp_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.uitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -1041,8 +1041,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i16_nxv8f64(<vscale x 8 x i16> %va) st
 ; CHECK-LABEL: vsitofp_nxv8i16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.sitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -1053,8 +1053,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i16_nxv8f64(<vscale x 8 x i16> %va) st
 ; CHECK-LABEL: vuitofp_nxv8i16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.uitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -1087,8 +1087,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
 ; CHECK-LABEL: vsitofp_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -1099,8 +1099,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
 ; CHECK-LABEL: vuitofp_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x float> @llvm.experimental.constrained.uitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x float> %evec
@@ -1133,8 +1133,7 @@ define <vscale x 1 x half> @vsitofp_nxv1i32_nxv1f16(<vscale x 1 x i32> %va) stri
 ; CHECK-LABEL: vsitofp_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -1145,8 +1144,7 @@ define <vscale x 1 x half> @vuitofp_nxv1i32_nxv1f16(<vscale x 1 x i32> %va) stri
 ; CHECK-LABEL: vuitofp_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.uitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -1203,8 +1201,7 @@ define <vscale x 2 x half> @vsitofp_nxv2i32_nxv2f16(<vscale x 2 x i32> %va) stri
 ; CHECK-LABEL: vsitofp_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -1215,8 +1212,7 @@ define <vscale x 2 x half> @vuitofp_nxv2i32_nxv2f16(<vscale x 2 x i32> %va) stri
 ; CHECK-LABEL: vuitofp_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.uitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -1249,8 +1245,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) st
 ; CHECK-LABEL: vsitofp_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -1261,8 +1257,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) st
 ; CHECK-LABEL: vuitofp_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x double> @llvm.experimental.constrained.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x double> %evec
@@ -1273,8 +1269,7 @@ define <vscale x 4 x half> @vsitofp_nxv4i32_nxv4f16(<vscale x 4 x i32> %va) stri
 ; CHECK-LABEL: vsitofp_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.sitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -1285,8 +1280,7 @@ define <vscale x 4 x half> @vuitofp_nxv4i32_nxv4f16(<vscale x 4 x i32> %va) stri
 ; CHECK-LABEL: vuitofp_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.uitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -1319,8 +1313,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) st
 ; CHECK-LABEL: vsitofp_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -1331,8 +1325,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) st
 ; CHECK-LABEL: vuitofp_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x double> @llvm.experimental.constrained.uitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x double> %evec
@@ -1343,8 +1337,7 @@ define <vscale x 8 x half> @vsitofp_nxv8i32_nxv8f16(<vscale x 8 x i32> %va) stri
 ; CHECK-LABEL: vsitofp_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.sitofp.nxv8f16.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -1355,8 +1348,7 @@ define <vscale x 8 x half> @vuitofp_nxv8i32_nxv8f16(<vscale x 8 x i32> %va) stri
 ; CHECK-LABEL: vuitofp_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.uitofp.nxv8f16.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -1389,8 +1381,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) st
 ; CHECK-LABEL: vsitofp_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.sitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -1401,8 +1393,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) st
 ; CHECK-LABEL: vuitofp_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x double> @llvm.experimental.constrained.uitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x double> %evec
@@ -1413,8 +1405,7 @@ define <vscale x 16 x half> @vsitofp_nxv16i32_nxv16f16(<vscale x 16 x i32> %va)
 ; CHECK-LABEL: vsitofp_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.sitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -1425,8 +1416,7 @@ define <vscale x 16 x half> @vuitofp_nxv16i32_nxv16f16(<vscale x 16 x i32> %va)
 ; CHECK-LABEL: vuitofp_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 16 x half> @llvm.experimental.constrained.uitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 16 x half> %evec
@@ -1459,9 +1449,9 @@ define <vscale x 1 x half> @vsitofp_nxv1i64_nxv1f16(<vscale x 1 x i64> %va) stri
 ; CHECK-LABEL: vsitofp_nxv1i64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -1472,9 +1462,9 @@ define <vscale x 1 x half> @vuitofp_nxv1i64_nxv1f16(<vscale x 1 x i64> %va) stri
 ; CHECK-LABEL: vuitofp_nxv1i64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x half> @llvm.experimental.constrained.uitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x half> %evec
@@ -1485,8 +1475,7 @@ define <vscale x 1 x float> @vsitofp_nxv1i64_nxv1f32(<vscale x 1 x i64> %va) str
 ; CHECK-LABEL: vsitofp_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x float> @llvm.experimental.constrained.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x float> %evec
@@ -1497,8 +1486,7 @@ define <vscale x 1 x float> @vuitofp_nxv1i64_nxv1f32(<vscale x 1 x i64> %va) str
 ; CHECK-LABEL: vuitofp_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 1 x float> @llvm.experimental.constrained.uitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 1 x float> %evec
@@ -1532,9 +1520,9 @@ define <vscale x 2 x half> @vsitofp_nxv2i64_nxv2f16(<vscale x 2 x i64> %va) stri
 ; CHECK-LABEL: vsitofp_nxv2i64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -1545,9 +1533,9 @@ define <vscale x 2 x half> @vuitofp_nxv2i64_nxv2f16(<vscale x 2 x i64> %va) stri
 ; CHECK-LABEL: vuitofp_nxv2i64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x half> @llvm.experimental.constrained.uitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x half> %evec
@@ -1558,8 +1546,7 @@ define <vscale x 2 x float> @vsitofp_nxv2i64_nxv2f32(<vscale x 2 x i64> %va) str
 ; CHECK-LABEL: vsitofp_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x float> @llvm.experimental.constrained.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x float> %evec
@@ -1570,8 +1557,7 @@ define <vscale x 2 x float> @vuitofp_nxv2i64_nxv2f32(<vscale x 2 x i64> %va) str
 ; CHECK-LABEL: vuitofp_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 2 x float> @llvm.experimental.constrained.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 2 x float> %evec
@@ -1604,9 +1590,9 @@ define <vscale x 4 x half> @vsitofp_nxv4i64_nxv4f16(<vscale x 4 x i64> %va) stri
 ; CHECK-LABEL: vsitofp_nxv4i64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.sitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -1617,9 +1603,9 @@ define <vscale x 4 x half> @vuitofp_nxv4i64_nxv4f16(<vscale x 4 x i64> %va) stri
 ; CHECK-LABEL: vuitofp_nxv4i64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x half> @llvm.experimental.constrained.uitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x half> %evec
@@ -1630,8 +1616,7 @@ define <vscale x 4 x float> @vsitofp_nxv4i64_nxv4f32(<vscale x 4 x i64> %va) str
 ; CHECK-LABEL: vsitofp_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -1642,8 +1627,7 @@ define <vscale x 4 x float> @vuitofp_nxv4i64_nxv4f32(<vscale x 4 x i64> %va) str
 ; CHECK-LABEL: vuitofp_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 4 x float> @llvm.experimental.constrained.uitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 4 x float> %evec
@@ -1676,9 +1660,9 @@ define <vscale x 8 x half> @vsitofp_nxv8i64_nxv8f16(<vscale x 8 x i64> %va) stri
 ; CHECK-LABEL: vsitofp_nxv8i64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.sitofp.nxv8f16.nxv8i64(<vscale x 8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -1689,9 +1673,9 @@ define <vscale x 8 x half> @vuitofp_nxv8i64_nxv8f16(<vscale x 8 x i64> %va) stri
 ; CHECK-LABEL: vuitofp_nxv8i64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x half> @llvm.experimental.constrained.uitofp.nxv8f16.nxv8i64(<vscale x 8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x half> %evec
@@ -1702,8 +1686,7 @@ define <vscale x 8 x float> @vsitofp_nxv8i64_nxv8f32(<vscale x 8 x i64> %va) str
 ; CHECK-LABEL: vsitofp_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.sitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
@@ -1714,8 +1697,7 @@ define <vscale x 8 x float> @vuitofp_nxv8i64_nxv8f32(<vscale x 8 x i64> %va) str
 ; CHECK-LABEL: vuitofp_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = call <vscale x 8 x float> @llvm.experimental.constrained.uitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <vscale x 8 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
index 95b1c35d48bb3..7f03cc8150136 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
@@ -69,9 +69,9 @@ define <vscale x 4 x bfloat> @vsitofp_nxv4i1_nxv4bf16(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i1> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -82,9 +82,9 @@ define <vscale x 4 x bfloat> @vuitofp_nxv4i1_nxv4bf16(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i1> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -95,9 +95,9 @@ define <vscale x 8 x bfloat> @vsitofp_nxv8i1_nxv8bf16(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v12, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i1> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -108,9 +108,9 @@ define <vscale x 8 x bfloat> @vuitofp_nxv8i1_nxv8bf16(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i1> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -121,9 +121,9 @@ define <vscale x 16 x bfloat> @vsitofp_nxv16i1_nxv16bf16(<vscale x 16 x i1> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i1> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -134,9 +134,9 @@ define <vscale x 16 x bfloat> @vuitofp_nxv16i1_nxv16bf16(<vscale x 16 x i1> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i1> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -146,16 +146,16 @@ define <vscale x 32 x bfloat> @vsitofp_nxv32i1_nxv32bf16(<vscale x 32 x i1> %va)
 ; CHECK-LABEL: vsitofp_nxv32i1_nxv32bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vmerge.vim v8, v12, -1, v0
+; CHECK-NEXT:    vmerge.vim v12, v16, -1, v0
 ; CHECK-NEXT:    srli a0, a0, 2
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vmerge.vim v12, v12, -1, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v12, v16, -1, v0
 ; CHECK-NEXT:    vfwcvt.f.x.v v16, v12
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
@@ -167,16 +167,16 @@ define <vscale x 32 x bfloat> @vuitofp_nxv32i1_nxv32bf16(<vscale x 32 x i1> %va)
 ; CHECK-LABEL: vuitofp_nxv32i1_nxv32bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vmerge.vim v8, v12, 1, v0
+; CHECK-NEXT:    vmerge.vim v12, v16, 1, v0
 ; CHECK-NEXT:    srli a0, a0, 2
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vmerge.vim v12, v16, 1, v0
 ; CHECK-NEXT:    vfwcvt.f.xu.v v16, v12
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
@@ -189,8 +189,8 @@ define <vscale x 1 x bfloat> @vsitofp_nxv1i8_nxv1bf16(<vscale x 1 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i8> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -204,8 +204,8 @@ define <vscale x 1 x bfloat> @vsitofp_nxv1i7_nxv1bf16(<vscale x 1 x i7> %va) {
 ; CHECK-NEXT:    vsra.vi v8, v8, 1
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i7> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -219,8 +219,8 @@ define <vscale x 1 x bfloat> @vuitofp_nxv1i7_nxv1bf16(<vscale x 1 x i7> %va) {
 ; CHECK-NEXT:    vand.vx v8, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i7> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -231,8 +231,8 @@ define <vscale x 1 x bfloat> @vuitofp_nxv1i8_nxv1bf16(<vscale x 1 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i8> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -243,8 +243,8 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2i8_nxv2bf16(<vscale x 2 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i8> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -255,8 +255,8 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2i8_nxv2bf16(<vscale x 2 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i8> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -267,8 +267,8 @@ define <vscale x 4 x bfloat> @vsitofp_nxv4i8_nxv4bf16(<vscale x 4 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i8> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -279,8 +279,8 @@ define <vscale x 4 x bfloat> @vuitofp_nxv4i8_nxv4bf16(<vscale x 4 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i8> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -291,8 +291,8 @@ define <vscale x 8 x bfloat> @vsitofp_nxv8i8_nxv8bf16(<vscale x 8 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v12, v10
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i8> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -303,8 +303,8 @@ define <vscale x 8 x bfloat> @vuitofp_nxv8i8_nxv8bf16(<vscale x 8 x i8> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v12, v10
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i8> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -315,8 +315,8 @@ define <vscale x 16 x bfloat> @vsitofp_nxv16i8_nxv16bf16(<vscale x 16 x i8> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v12
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i8> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -327,8 +327,8 @@ define <vscale x 16 x bfloat> @vuitofp_nxv16i8_nxv16bf16(<vscale x 16 x i8> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v12
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i8> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -339,10 +339,10 @@ define <vscale x 32 x bfloat> @vsitofp_nxv32i8_nxv32bf16(<vscale x 32 x i8> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v24, v10
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v12
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v24
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v16, v20
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 32 x i8> %va to <vscale x 32 x bfloat>
@@ -354,10 +354,10 @@ define <vscale x 32 x bfloat> @vuitofp_nxv32i8_nxv32bf16(<vscale x 32 x i8> %va)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vzext.vf2 v24, v10
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v12
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v24
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v16, v20
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 32 x i8> %va to <vscale x 32 x bfloat>
@@ -504,9 +504,9 @@ define <vscale x 1 x bfloat> @vsitofp_nxv1i32_nxv1bf16(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv1i32_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v9, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i32> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -516,9 +516,9 @@ define <vscale x 1 x bfloat> @vuitofp_nxv1i32_nxv1bf16(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv1i32_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i32> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -528,9 +528,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2i32_nxv2bf16(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i32_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v9, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i32> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -540,9 +540,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2i32_nxv2bf16(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i32_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i32> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -552,9 +552,9 @@ define <vscale x 4 x bfloat> @vsitofp_nxv4i32_nxv4bf16(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i32_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v10, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i32> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -564,9 +564,9 @@ define <vscale x 4 x bfloat> @vuitofp_nxv4i32_nxv4bf16(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i32_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v10, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i32> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -576,9 +576,9 @@ define <vscale x 8 x bfloat> @vsitofp_nxv8i32_nxv8bf16(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i32_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v12, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i32> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -588,9 +588,9 @@ define <vscale x 8 x bfloat> @vuitofp_nxv8i32_nxv8bf16(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i32_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v12, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i32> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -600,9 +600,9 @@ define <vscale x 16 x bfloat> @vsitofp_nxv16i32_nxv16bf16(<vscale x 16 x i32> %v
 ; CHECK-LABEL: vsitofp_nxv16i32_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v16, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i32> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -612,9 +612,9 @@ define <vscale x 16 x bfloat> @vuitofp_nxv16i32_nxv16bf16(<vscale x 16 x i32> %v
 ; CHECK-LABEL: vuitofp_nxv16i32_nxv16bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v16, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i32> %va to <vscale x 16 x bfloat>
   ret <vscale x 16 x bfloat> %evec
@@ -624,9 +624,9 @@ define <vscale x 1 x bfloat> @vsitofp_nxv1i64_nxv1bf16(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv1i64_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i64> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -636,9 +636,9 @@ define <vscale x 1 x bfloat> @vuitofp_nxv1i64_nxv1bf16(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv1i64_nxv1bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i64> %va to <vscale x 1 x bfloat>
   ret <vscale x 1 x bfloat> %evec
@@ -648,9 +648,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2i64_nxv2bf16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i64_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i64> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -660,9 +660,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2i64_nxv2bf16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i64_nxv2bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i64> %va to <vscale x 2 x bfloat>
   ret <vscale x 2 x bfloat> %evec
@@ -672,9 +672,9 @@ define <vscale x 4 x bfloat> @vsitofp_nxv4i64_nxv4bf16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i64_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i64> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -684,9 +684,9 @@ define <vscale x 4 x bfloat> @vuitofp_nxv4i64_nxv4bf16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i64_nxv4bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i64> %va to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %evec
@@ -696,9 +696,9 @@ define <vscale x 8 x bfloat> @vsitofp_nxv8i64_nxv8bf16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i64_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i64> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -708,9 +708,9 @@ define <vscale x 8 x bfloat> @vuitofp_nxv8i64_nxv8bf16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i64_nxv8bf16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i64> %va to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %evec
@@ -877,8 +877,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i1_nxv2f64(<vscale x 2 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i1> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -889,8 +889,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i1_nxv2f64(<vscale x 2 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i1> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -909,9 +909,9 @@ define <vscale x 4 x half> @vsitofp_nxv4i1_nxv4f16(<vscale x 4 x i1> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vmerge.vim v9, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 4 x i1> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -930,9 +930,9 @@ define <vscale x 4 x half> @vuitofp_nxv4i1_nxv4f16(<vscale x 4 x i1> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vmerge.vim v9, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 4 x i1> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -943,8 +943,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i1_nxv4f32(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i1> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -955,8 +955,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i1_nxv4f32(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmerge.vim v9, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i1> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -967,8 +967,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i1_nxv4f64(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i1> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -979,8 +979,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i1_nxv4f64(<vscale x 4 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i1> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -991,17 +991,17 @@ define <vscale x 8 x half> @vsitofp_nxv8i1_nxv8f16(<vscale x 8 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v10, v8, -1, v0
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v10
+; ZVFH-NEXT:    vmerge.vim v9, v8, -1, v0
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv8i1_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v12, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vmerge.vim v10, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 8 x i1> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -1012,17 +1012,17 @@ define <vscale x 8 x half> @vuitofp_nxv8i1_nxv8f16(<vscale x 8 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v10, v8, 1, v0
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v10
+; ZVFH-NEXT:    vmerge.vim v9, v8, 1, v0
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv8i1_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v12, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vmerge.vim v10, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 8 x i1> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -1033,8 +1033,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i1_nxv8f32(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i1> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1045,8 +1045,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i1_nxv8f32(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i1> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1057,8 +1057,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i1_nxv8f64(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i1> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -1069,8 +1069,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i1_nxv8f64(<vscale x 8 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i1> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -1081,17 +1081,17 @@ define <vscale x 16 x half> @vsitofp_nxv16i1_nxv16f16(<vscale x 16 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v12, v8, -1, v0
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v12
+; ZVFH-NEXT:    vmerge.vim v10, v8, -1, v0
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv16i1_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vmerge.vim v12, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 16 x i1> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -1102,17 +1102,17 @@ define <vscale x 16 x half> @vuitofp_nxv16i1_nxv16f16(<vscale x 16 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v12, v8, 1, v0
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v12
+; ZVFH-NEXT:    vmerge.vim v10, v8, 1, v0
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv16i1_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v8
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vmerge.vim v12, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 16 x i1> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -1123,8 +1123,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i1_nxv16f32(<vscale x 16 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, -1, v0
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i1> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -1135,8 +1135,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i1_nxv16f32(<vscale x 16 x i1> %va) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i1> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -1147,23 +1147,23 @@ define <vscale x 32 x half> @vsitofp_nxv32i1_nxv32f16(<vscale x 32 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v16, v8, -1, v0
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v16
+; ZVFH-NEXT:    vmerge.vim v12, v8, -1, v0
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv32i1_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.i v12, 0
+; ZVFHMIN-NEXT:    vmv.v.i v16, 0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    vmerge.vim v8, v12, -1, v0
+; ZVFHMIN-NEXT:    vmerge.vim v12, v16, -1, v0
 ; ZVFHMIN-NEXT:    srli a0, a0, 2
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vmerge.vim v12, v12, -1, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vmerge.vim v12, v16, -1, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v12
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
@@ -1176,23 +1176,23 @@ define <vscale x 32 x half> @vuitofp_nxv32i1_nxv32f16(<vscale x 32 x i1> %va) {
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; ZVFH-NEXT:    vmv.v.i v8, 0
-; ZVFH-NEXT:    vmerge.vim v16, v8, 1, v0
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v16
+; ZVFH-NEXT:    vmerge.vim v12, v8, 1, v0
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv32i1_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.i v12, 0
+; ZVFHMIN-NEXT:    vmv.v.i v16, 0
 ; ZVFHMIN-NEXT:    csrr a0, vlenb
-; ZVFHMIN-NEXT:    vmerge.vim v8, v12, 1, v0
+; ZVFHMIN-NEXT:    vmerge.vim v12, v16, 1, v0
 ; ZVFHMIN-NEXT:    srli a0, a0, 2
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v12
 ; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a0
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vmerge.vim v12, v12, 1, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vmerge.vim v12, v16, 1, v0
 ; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v12
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
@@ -1212,8 +1212,8 @@ define <vscale x 1 x half> @vsitofp_nxv1i8_nxv1f16(<vscale x 1 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 1 x i8> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -1235,8 +1235,8 @@ define <vscale x 1 x half> @vsitofp_nxv1i7_nxv1f16(<vscale x 1 x i7> %va) {
 ; ZVFHMIN-NEXT:    vsra.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 1 x i7> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -1258,8 +1258,8 @@ define <vscale x 1 x half> @vuitofp_nxv1i7_nxv1f16(<vscale x 1 x i7> %va) {
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a0
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 1 x i7> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -1277,8 +1277,8 @@ define <vscale x 1 x half> @vuitofp_nxv1i8_nxv1f16(<vscale x 1 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 1 x i8> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -1340,8 +1340,8 @@ define <vscale x 2 x half> @vsitofp_nxv2i8_nxv2f16(<vscale x 2 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 2 x i8> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -1359,8 +1359,8 @@ define <vscale x 2 x half> @vuitofp_nxv2i8_nxv2f16(<vscale x 2 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 2 x i8> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -1392,8 +1392,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i8_nxv2f64(<vscale x 2 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i8> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -1403,8 +1403,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i8_nxv2f64(<vscale x 2 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i8_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i8> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -1422,8 +1422,8 @@ define <vscale x 4 x half> @vsitofp_nxv4i8_nxv4f16(<vscale x 4 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 4 x i8> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -1441,8 +1441,8 @@ define <vscale x 4 x half> @vuitofp_nxv4i8_nxv4f16(<vscale x 4 x i8> %va) {
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 4 x i8> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -1452,8 +1452,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i8_nxv4f32(<vscale x 4 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i8_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i8> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -1463,8 +1463,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i8_nxv4f32(<vscale x 4 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i8_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i8> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -1474,8 +1474,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i8_nxv4f64(<vscale x 4 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i8_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i8> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -1485,8 +1485,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i8_nxv4f64(<vscale x 4 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i8_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i8> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -1496,16 +1496,16 @@ define <vscale x 8 x half> @vsitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) {
 ; ZVFH-LABEL: vsitofp_nxv8i8_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v8
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v10
+; ZVFH-NEXT:    vmv1r.v v9, v8
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv8i8_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v12, v10
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 8 x i8> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -1515,16 +1515,16 @@ define <vscale x 8 x half> @vuitofp_nxv8i8_nxv8f16(<vscale x 8 x i8> %va) {
 ; ZVFH-LABEL: vuitofp_nxv8i8_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; ZVFH-NEXT:    vmv1r.v v10, v8
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v10
+; ZVFH-NEXT:    vmv1r.v v9, v8
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv8i8_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v12, v10
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 8 x i8> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -1534,8 +1534,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i8_nxv8f32(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i8_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i8> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1545,8 +1545,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i8_nxv8f32(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i8_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i8> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1556,8 +1556,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i8_nxv8f64(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i8_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i8> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -1567,8 +1567,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i8_nxv8f64(<vscale x 8 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i8_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i8> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -1578,16 +1578,16 @@ define <vscale x 16 x half> @vsitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) {
 ; ZVFH-LABEL: vsitofp_nxv16i8_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vmv2r.v v12, v8
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v12
+; ZVFH-NEXT:    vmv2r.v v10, v8
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv16i8_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v12
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 16 x i8> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -1597,16 +1597,16 @@ define <vscale x 16 x half> @vuitofp_nxv16i8_nxv16f16(<vscale x 16 x i8> %va) {
 ; ZVFH-LABEL: vuitofp_nxv16i8_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; ZVFH-NEXT:    vmv2r.v v12, v8
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v12
+; ZVFH-NEXT:    vmv2r.v v10, v8
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v10
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv16i8_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v12
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 16 x i8> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -1616,8 +1616,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i8_nxv16f32(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vsitofp_nxv16i8_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i8> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -1627,8 +1627,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i8_nxv16f32(<vscale x 16 x i8> %va) {
 ; CHECK-LABEL: vuitofp_nxv16i8_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i8> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -1638,18 +1638,18 @@ define <vscale x 32 x half> @vsitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) {
 ; ZVFH-LABEL: vsitofp_nxv32i8_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vmv4r.v v16, v8
-; ZVFH-NEXT:    vfwcvt.f.x.v v8, v16
+; ZVFH-NEXT:    vmv4r.v v12, v8
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv32i8_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v12, v8
-; ZVFHMIN-NEXT:    vsext.vf2 v24, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v12
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v24
+; ZVFHMIN-NEXT:    vsext.vf2 v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v16, v20
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 32 x i8> %va to <vscale x 32 x half>
@@ -1660,18 +1660,18 @@ define <vscale x 32 x half> @vuitofp_nxv32i8_nxv32f16(<vscale x 32 x i8> %va) {
 ; ZVFH-LABEL: vuitofp_nxv32i8_nxv32f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVFH-NEXT:    vmv4r.v v16, v8
-; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v16
+; ZVFH-NEXT:    vmv4r.v v12, v8
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v12
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv32i8_nxv32f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v12, v8
-; ZVFHMIN-NEXT:    vzext.vf2 v24, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v12
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v24
+; ZVFHMIN-NEXT:    vzext.vf2 v20, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v16, v20
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 32 x i8> %va to <vscale x 32 x half>
@@ -1816,8 +1816,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i16_nxv2f64(<vscale x 2 x i16> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i16> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -1827,8 +1827,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i16_nxv2f64(<vscale x 2 x i16> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i16_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i16> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -1872,8 +1872,8 @@ define <vscale x 4 x float> @vsitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i16> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -1883,8 +1883,8 @@ define <vscale x 4 x float> @vuitofp_nxv4i16_nxv4f32(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i16> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -1894,8 +1894,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i16_nxv4f64(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i16> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -1905,8 +1905,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i16_nxv4f64(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i16_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i16> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -1950,8 +1950,8 @@ define <vscale x 8 x float> @vsitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i16> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1961,8 +1961,8 @@ define <vscale x 8 x float> @vuitofp_nxv8i16_nxv8f32(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i16> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -1972,8 +1972,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i16_nxv8f64(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i16> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -1983,8 +1983,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i16_nxv8f64(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i16_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i16> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -2028,8 +2028,8 @@ define <vscale x 16 x float> @vsitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
 ; CHECK-LABEL: vsitofp_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 16 x i16> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -2039,8 +2039,8 @@ define <vscale x 16 x float> @vuitofp_nxv16i16_nxv16f32(<vscale x 16 x i16> %va)
 ; CHECK-LABEL: vuitofp_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 16 x i16> %va to <vscale x 16 x float>
   ret <vscale x 16 x float> %evec
@@ -2088,16 +2088,15 @@ define <vscale x 1 x half> @vsitofp_nxv1i32_nxv1f16(<vscale x 1 x i32> %va) {
 ; ZVFH-LABEL: vsitofp_nxv1i32_nxv1f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv1i32_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 1 x i32> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -2107,16 +2106,15 @@ define <vscale x 1 x half> @vuitofp_nxv1i32_nxv1f16(<vscale x 1 x i32> %va) {
 ; ZVFH-LABEL: vuitofp_nxv1i32_nxv1f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv1i32_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 1 x i32> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -2168,16 +2166,15 @@ define <vscale x 2 x half> @vsitofp_nxv2i32_nxv2f16(<vscale x 2 x i32> %va) {
 ; ZVFH-LABEL: vsitofp_nxv2i32_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv2i32_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 2 x i32> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -2187,16 +2184,15 @@ define <vscale x 2 x half> @vuitofp_nxv2i32_nxv2f16(<vscale x 2 x i32> %va) {
 ; ZVFH-LABEL: vuitofp_nxv2i32_nxv2f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv2i32_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 2 x i32> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -2226,8 +2222,8 @@ define <vscale x 2 x double> @vsitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i32> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -2237,8 +2233,8 @@ define <vscale x 2 x double> @vuitofp_nxv2i32_nxv2f64(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i32> %va to <vscale x 2 x double>
   ret <vscale x 2 x double> %evec
@@ -2248,16 +2244,15 @@ define <vscale x 4 x half> @vsitofp_nxv4i32_nxv4f16(<vscale x 4 x i32> %va) {
 ; ZVFH-LABEL: vsitofp_nxv4i32_nxv4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v10, v8
-; ZVFH-NEXT:    vmv.v.v v8, v10
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv4i32_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v10, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 4 x i32> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -2267,16 +2262,15 @@ define <vscale x 4 x half> @vuitofp_nxv4i32_nxv4f16(<vscale x 4 x i32> %va) {
 ; ZVFH-LABEL: vuitofp_nxv4i32_nxv4f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v10, v8
-; ZVFH-NEXT:    vmv.v.v v8, v10
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv4i32_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v10, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 4 x i32> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -2306,8 +2300,8 @@ define <vscale x 4 x double> @vsitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i32> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -2317,8 +2311,8 @@ define <vscale x 4 x double> @vuitofp_nxv4i32_nxv4f64(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i32> %va to <vscale x 4 x double>
   ret <vscale x 4 x double> %evec
@@ -2328,16 +2322,15 @@ define <vscale x 8 x half> @vsitofp_nxv8i32_nxv8f16(<vscale x 8 x i32> %va) {
 ; ZVFH-LABEL: vsitofp_nxv8i32_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v12, v8
-; ZVFH-NEXT:    vmv.v.v v8, v12
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv8i32_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v12, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 8 x i32> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -2347,16 +2340,15 @@ define <vscale x 8 x half> @vuitofp_nxv8i32_nxv8f16(<vscale x 8 x i32> %va) {
 ; ZVFH-LABEL: vuitofp_nxv8i32_nxv8f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v12, v8
-; ZVFH-NEXT:    vmv.v.v v8, v12
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv8i32_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v12, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 8 x i32> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -2386,8 +2378,8 @@ define <vscale x 8 x double> @vsitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i32> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -2397,8 +2389,8 @@ define <vscale x 8 x double> @vuitofp_nxv8i32_nxv8f64(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i32> %va to <vscale x 8 x double>
   ret <vscale x 8 x double> %evec
@@ -2408,16 +2400,15 @@ define <vscale x 16 x half> @vsitofp_nxv16i32_nxv16f16(<vscale x 16 x i32> %va)
 ; ZVFH-LABEL: vsitofp_nxv16i32_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v16, v8
-; ZVFH-NEXT:    vmv.v.v v8, v16
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv16i32_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v16, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = sitofp <vscale x 16 x i32> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -2427,16 +2418,15 @@ define <vscale x 16 x half> @vuitofp_nxv16i32_nxv16f16(<vscale x 16 x i32> %va)
 ; ZVFH-LABEL: vuitofp_nxv16i32_nxv16f16:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v16, v8
-; ZVFH-NEXT:    vmv.v.v v8, v16
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv16i32_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v16, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %evec = uitofp <vscale x 16 x i32> %va to <vscale x 16 x half>
   ret <vscale x 16 x half> %evec
@@ -2466,9 +2456,9 @@ define <vscale x 1 x half> @vsitofp_nxv1i64_nxv1f16(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv1i64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i64> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -2478,9 +2468,9 @@ define <vscale x 1 x half> @vuitofp_nxv1i64_nxv1f16(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv1i64_nxv1f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i64> %va to <vscale x 1 x half>
   ret <vscale x 1 x half> %evec
@@ -2490,8 +2480,7 @@ define <vscale x 1 x float> @vsitofp_nxv1i64_nxv1f32(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 1 x i64> %va to <vscale x 1 x float>
   ret <vscale x 1 x float> %evec
@@ -2501,8 +2490,7 @@ define <vscale x 1 x float> @vuitofp_nxv1i64_nxv1f32(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 1 x i64> %va to <vscale x 1 x float>
   ret <vscale x 1 x float> %evec
@@ -2532,9 +2520,9 @@ define <vscale x 2 x half> @vsitofp_nxv2i64_nxv2f16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i64> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -2544,9 +2532,9 @@ define <vscale x 2 x half> @vuitofp_nxv2i64_nxv2f16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i64_nxv2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i64> %va to <vscale x 2 x half>
   ret <vscale x 2 x half> %evec
@@ -2556,8 +2544,7 @@ define <vscale x 2 x float> @vsitofp_nxv2i64_nxv2f32(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 2 x i64> %va to <vscale x 2 x float>
   ret <vscale x 2 x float> %evec
@@ -2567,8 +2554,7 @@ define <vscale x 2 x float> @vuitofp_nxv2i64_nxv2f32(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 2 x i64> %va to <vscale x 2 x float>
   ret <vscale x 2 x float> %evec
@@ -2598,9 +2584,9 @@ define <vscale x 4 x half> @vsitofp_nxv4i64_nxv4f16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i64> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -2610,9 +2596,9 @@ define <vscale x 4 x half> @vuitofp_nxv4i64_nxv4f16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i64_nxv4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i64> %va to <vscale x 4 x half>
   ret <vscale x 4 x half> %evec
@@ -2622,8 +2608,7 @@ define <vscale x 4 x float> @vsitofp_nxv4i64_nxv4f32(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 4 x i64> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -2633,8 +2618,7 @@ define <vscale x 4 x float> @vuitofp_nxv4i64_nxv4f32(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 4 x i64> %va to <vscale x 4 x float>
   ret <vscale x 4 x float> %evec
@@ -2664,9 +2648,9 @@ define <vscale x 8 x half> @vsitofp_nxv8i64_nxv8f16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i64> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -2676,9 +2660,9 @@ define <vscale x 8 x half> @vuitofp_nxv8i64_nxv8f16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i64_nxv8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16
+; CHECK-NEXT:    vfncvt.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i64> %va to <vscale x 8 x half>
   ret <vscale x 8 x half> %evec
@@ -2688,8 +2672,7 @@ define <vscale x 8 x float> @vsitofp_nxv8i64_nxv8f32(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vsitofp_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = sitofp <vscale x 8 x i64> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
@@ -2699,8 +2682,7 @@ define <vscale x 8 x float> @vuitofp_nxv8i64_nxv8f32(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vuitofp_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %evec = uitofp <vscale x 8 x i64> %va to <vscale x 8 x float>
   ret <vscale x 8 x float> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 1cbb980aebffc..ad49793549eb4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -986,16 +986,16 @@ define <vscale x 4 x i1> @vmadc_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmadc_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmadc.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmadc.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmadc_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmadc.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmadc.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1006,16 +1006,16 @@ define <vscale x 4 x i1> @vmadc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmadc_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmadc.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmadc.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmadc_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmadc.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmadc.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1026,16 +1026,16 @@ define <vscale x 4 x i1> @vmsbc_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsbc_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsbc.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsbc.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsbc_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsbc.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsbc.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1046,16 +1046,16 @@ define <vscale x 4 x i1> @vmsbc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsbc_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsbc.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmsbc.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsbc_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsbc.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmsbc.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsbc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1066,16 +1066,16 @@ define <vscale x 4 x i16> @vnsrl_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsrl_wi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsrl.wi v11, v8, 5
+; NOVLOPT-NEXT:    vnsrl.wi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsrl_wi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsrl.wi v11, v8, 5
-; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    vnsrl.wi v8, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1086,16 +1086,16 @@ define <vscale x 4 x i16> @vnsrl_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsrl_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsrl.wx v11, v8, a0
+; NOVLOPT-NEXT:    vnsrl.wx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsrl_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsrl.wx v11, v8, a0
-; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    vnsrl.wx v8, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1106,16 +1106,16 @@ define <vscale x 4 x i16> @vnsrl_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsrl_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsrl.wv v12, v8, v11
+; NOVLOPT-NEXT:    vnsrl.wv v8, v8, v11
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsrl_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsrl.wv v12, v8, v11
-; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    vnsrl.wv v8, v8, v11
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1126,16 +1126,16 @@ define <vscale x 4 x i16> @vnsra_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsra_wi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsra.wi v11, v8, 5
+; NOVLOPT-NEXT:    vnsra.wi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsra_wi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsra.wi v11, v8, 5
-; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    vnsra.wi v8, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1146,16 +1146,16 @@ define <vscale x 4 x i16> @vnsra_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsra_wx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsra.wx v11, v8, a0
+; NOVLOPT-NEXT:    vnsra.wx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsra_wx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsra.wx v11, v8, a0
-; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    vnsra.wx v8, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1166,16 +1166,16 @@ define <vscale x 4 x i16> @vnsra_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vnsra_wv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vnsra.wv v12, v8, v11
+; NOVLOPT-NEXT:    vnsra.wv v8, v8, v11
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnsra_wv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; VLOPT-NEXT:    vnsra.wv v12, v8, v11
-; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    vnsra.wv v8, v8, v11
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1)
   %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
@@ -1186,16 +1186,16 @@ define <vscale x 4 x i1> @vmseq_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmseq_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmseq.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmseq.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmseq_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmseq.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmseq.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1206,16 +1206,16 @@ define <vscale x 4 x i1> @vmseq_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmseq_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmseq.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmseq.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmseq_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmseq.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmseq.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1226,16 +1226,16 @@ define <vscale x 4 x i1> @vmseq_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmseq_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmseq.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmseq.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmseq_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmseq.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmseq.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1246,16 +1246,16 @@ define <vscale x 4 x i1> @vmsne_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsne_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsne.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmsne.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsne_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsne.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsne.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1266,16 +1266,16 @@ define <vscale x 4 x i1> @vmsne_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsne_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsne.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsne.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsne_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsne.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsne.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1286,16 +1286,16 @@ define <vscale x 4 x i1> @vmsne_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsne_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsne.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmsne.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsne_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsne.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmsne.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1306,16 +1306,16 @@ define <vscale x 4 x i1> @vmsltu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsltu_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsltu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsltu.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsltu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsltu.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsltu.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1326,16 +1326,16 @@ define <vscale x 4 x i1> @vmsltu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsltu_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsltu.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmsltu.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsltu_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsltu.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmsltu.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1346,16 +1346,16 @@ define <vscale x 4 x i1> @vmslt_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmslt_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmslt.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmslt.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmslt_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmslt.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmslt.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1366,16 +1366,16 @@ define <vscale x 4 x i1> @vmslt_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmslt_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmslt.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmslt.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmslt_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmslt.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmslt.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1386,16 +1386,16 @@ define <vscale x 4 x i1> @vmsleu_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsleu_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsleu.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmsleu.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsleu_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsleu.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsleu.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1406,16 +1406,16 @@ define <vscale x 4 x i1> @vmsleu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsleu_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsleu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsleu.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsleu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsleu.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsleu.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1426,16 +1426,16 @@ define <vscale x 4 x i1> @vmsleu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsleu_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsleu.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmsleu.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsleu_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsleu.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmsleu.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1446,16 +1446,16 @@ define <vscale x 4 x i1> @vmsle_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsle_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsle.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmsle.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsle_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsle.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsle.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1466,16 +1466,16 @@ define <vscale x 4 x i1> @vmsle_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsle_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsle.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsle.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsle_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsle.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsle.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1486,16 +1486,16 @@ define <vscale x 4 x i1> @vmsle_vv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsle_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsle.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmsle.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsle_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsle.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmsle.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1506,16 +1506,16 @@ define <vscale x 4 x i1> @vmsgtu_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsgtu_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsgtu.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmsgtu.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsgtu_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsgtu.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsgtu.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1526,16 +1526,16 @@ define <vscale x 4 x i1> @vmsgtu_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsgtu_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsgtu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsgtu.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsgtu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsgtu.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsgtu.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1546,16 +1546,16 @@ define <vscale x 4 x i1> @vmsgt_vi(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsgt_vi:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsgt.vi v10, v8, 5
+; NOVLOPT-NEXT:    vmsgt.vi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsgt_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsgt.vi v10, v8, 5
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsgt.vi v8, v8, 5
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 5, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -1566,16 +1566,16 @@ define <vscale x 4 x i1> @vmsgt_vx(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b,
 ; NOVLOPT-LABEL: vmsgt_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmsgt.vx v10, v8, a0
+; NOVLOPT-NEXT:    vmsgt.vx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmsgt_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vmsgt.vx v10, v8, a0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmsgt.vx v8, v8, a0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -2066,17 +2066,17 @@ define <vscale x 4 x i64> @vwmul_vv(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b
 ; NOVLOPT-LABEL: vwmul_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vwmul.vv v12, v8, v9
+; NOVLOPT-NEXT:    vwmul.vv v10, v8, v9
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vwmul.vv v8, v12, v12
+; NOVLOPT-NEXT:    vwmul.vv v8, v10, v10
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vwmul_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; VLOPT-NEXT:    vwmul.vv v12, v8, v9
+; VLOPT-NEXT:    vwmul.vv v10, v8, v9
 ; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT:    vwmul.vv v8, v12, v12
+; VLOPT-NEXT:    vwmul.vv v8, v10, v10
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b, iXLen -1)
   %2 = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -2087,17 +2087,17 @@ define <vscale x 4 x i64> @vwmul_vx(<vscale x 4 x i16> %a, i16 %b, i32 %c, iXLen
 ; NOVLOPT-LABEL: vwmul_vx:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a3, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vwmul.vx v12, v8, a0
+; NOVLOPT-NEXT:    vwmul.vx v10, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vwmul.vx v8, v12, a1
+; NOVLOPT-NEXT:    vwmul.vx v8, v10, a1
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vwmul_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
-; VLOPT-NEXT:    vwmul.vx v12, v8, a0
+; VLOPT-NEXT:    vwmul.vx v10, v8, a0
 ; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT:    vwmul.vx v8, v12, a1
+; VLOPT-NEXT:    vwmul.vx v8, v10, a1
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.i16(<vscale x 4 x i32> poison, <vscale x 4 x i16> %a, i16 %b, iXLen -1)
   %2 = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i64.i32(<vscale x 4 x i64> poison, <vscale x 4 x i32> %1, i32 %c, iXLen %vl)
@@ -2892,17 +2892,17 @@ define <vscale x 4 x i32> @vnclipu_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32>
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclipu.wv v14, v8, v12
+; NOVLOPT-NEXT:    vnclipu.wv v8, v8, v12
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v14, v14
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclipu_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclipu.wv v14, v8, v12
-; VLOPT-NEXT:    vadd.vv v8, v14, v14
+; VLOPT-NEXT:    vnclipu.wv v8, v8, v12
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -2914,17 +2914,17 @@ define <vscale x 4 x i32> @vnclipu_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclipu.wx v12, v8, a0
+; NOVLOPT-NEXT:    vnclipu.wx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclipu_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclipu.wx v12, v8, a0
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vnclipu.wx v8, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -2936,17 +2936,17 @@ define <vscale x 4 x i32> @vnclipu_vi(<vscale x 4 x i64> %a, iXLen %vl) {
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclipu.wi v12, v8, 5
+; NOVLOPT-NEXT:    vnclipu.wi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclipu_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclipu.wi v12, v8, 5
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vnclipu.wi v8, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -2958,17 +2958,17 @@ define <vscale x 4 x i32> @vnclip_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclip.wv v14, v8, v12
+; NOVLOPT-NEXT:    vnclip.wv v8, v8, v12
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v14, v14
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclip_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclip.wv v14, v8, v12
-; VLOPT-NEXT:    vadd.vv v8, v14, v14
+; VLOPT-NEXT:    vnclip.wv v8, v8, v12
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -2980,17 +2980,17 @@ define <vscale x 4 x i32> @vnclip_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl)
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclip.wx v12, v8, a0
+; NOVLOPT-NEXT:    vnclip.wx v8, v8, a0
 ; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclip_vx:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclip.wx v12, v8, a0
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vnclip.wx v8, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -3002,17 +3002,17 @@ define <vscale x 4 x i32> @vnclip_vi(<vscale x 4 x i64> %a, iXLen %vl) {
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vnclip.wi v12, v8, 5
+; NOVLOPT-NEXT:    vnclip.wi v8, v8, 5
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclip_vi:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vnclip.wi v12, v8, 5
-; VLOPT-NEXT:    vadd.vv v8, v12, v12
+; VLOPT-NEXT:    vnclip.wi v8, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
@@ -3848,16 +3848,16 @@ define <vscale x 4 x i1> @vmfeq_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfeq_vf:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfeq.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vmfeq.vf v8, v8, fa0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfeq_vf:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfeq.vf v10, v8, fa0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmfeq.vf v8, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3868,16 +3868,16 @@ define <vscale x 4 x i1> @vmfeq_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfeq_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfeq.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmfeq.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfeq_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfeq.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmfeq.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3888,16 +3888,16 @@ define <vscale x 4 x i1> @vmfne_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfne_vf:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfne.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vmfne.vf v8, v8, fa0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfne_vf:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfne.vf v10, v8, fa0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmfne.vf v8, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3908,16 +3908,16 @@ define <vscale x 4 x i1> @vmfne_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfne_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfne.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmfne.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfne_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfne.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmfne.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3928,16 +3928,16 @@ define <vscale x 4 x i1> @vmflt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmflt_vf:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmflt.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vmflt.vf v8, v8, fa0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmflt_vf:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmflt.vf v10, v8, fa0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmflt.vf v8, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3948,16 +3948,16 @@ define <vscale x 4 x i1> @vmflt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmflt_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmflt.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmflt.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmflt_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmflt.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmflt.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3968,16 +3968,16 @@ define <vscale x 4 x i1> @vmfle_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfle_vf:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfle.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vmfle.vf v8, v8, fa0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfle_vf:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfle.vf v10, v8, fa0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmfle.vf v8, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -3988,16 +3988,16 @@ define <vscale x 4 x i1> @vmfle_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfle_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfle.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmfle.vv v8, v8, v10
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfle_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfle.vv v12, v8, v10
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmfle.vv v8, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -4008,16 +4008,16 @@ define <vscale x 4 x i1> @vmfgt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfgt_vf:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmfgt.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vmfgt.vf v8, v8, fa0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfgt_vf:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmfgt.vf v10, v8, fa0
-; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    vmfgt.vf v8, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -4028,16 +4028,16 @@ define <vscale x 4 x i1> @vmfgt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
 ; NOVLOPT-LABEL: vmfgt_vv:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmflt.vv v12, v10, v8
+; NOVLOPT-NEXT:    vmflt.vv v8, v10, v8
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    vmand.mm v0, v8, v0
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vmfgt_vv:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vmflt.vv v12, v10, v8
-; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    vmflt.vv v8, v10, v8
+; VLOPT-NEXT:    vmand.mm v0, v8, v0
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
@@ -5074,22 +5074,22 @@ define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) {
 ; NOVLOPT-LABEL: vfsqrt:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmv2r.v v12, v8
+; NOVLOPT-NEXT:    vmv2r.v v10, v8
 ; NOVLOPT-NEXT:    fsrmi a0, 0
-; NOVLOPT-NEXT:    vfsqrt.v v14, v8
+; NOVLOPT-NEXT:    vfsqrt.v v12, v8
 ; NOVLOPT-NEXT:    fsrm a0
 ; NOVLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vfwmacc.vv v8, v12, v14
+; NOVLOPT-NEXT:    vfwmacc.vv v8, v10, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vfsqrt:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; VLOPT-NEXT:    vmv2r.v v12, v8
+; VLOPT-NEXT:    vmv2r.v v10, v8
 ; VLOPT-NEXT:    fsrmi a0, 0
-; VLOPT-NEXT:    vfsqrt.v v14, v8
+; VLOPT-NEXT:    vfsqrt.v v12, v8
 ; VLOPT-NEXT:    fsrm a0
-; VLOPT-NEXT:    vfwmacc.vv v8, v12, v14
+; VLOPT-NEXT:    vfwmacc.vv v8, v10, v12
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7)
   %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
@@ -5100,18 +5100,18 @@ define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) {
 ; NOVLOPT-LABEL: vfrsqrt7:
 ; NOVLOPT:       # %bb.0:
 ; NOVLOPT-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vmv2r.v v12, v8
-; NOVLOPT-NEXT:    vfrsqrt7.v v14, v8
+; NOVLOPT-NEXT:    vmv2r.v v10, v8
+; NOVLOPT-NEXT:    vfrsqrt7.v v12, v8
 ; NOVLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vfwmacc.vv v8, v12, v14
+; NOVLOPT-NEXT:    vfwmacc.vv v8, v10, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vfrsqrt7:
 ; VLOPT:       # %bb.0:
 ; VLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; VLOPT-NEXT:    vmv2r.v v12, v8
-; VLOPT-NEXT:    vfrsqrt7.v v14, v8
-; VLOPT-NEXT:    vfwmacc.vv v8, v12, v14
+; VLOPT-NEXT:    vmv2r.v v10, v8
+; VLOPT-NEXT:    vfrsqrt7.v v12, v8
+; VLOPT-NEXT:    vfwmacc.vv v8, v10, v12
 ; VLOPT-NEXT:    ret
   %1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7)
   %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
index 5d28534972b3f..7ecff4f392908 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64(
@@ -62,8 +61,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64(
@@ -110,8 +108,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64(
@@ -158,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64(
@@ -206,8 +202,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64(
@@ -254,8 +249,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64(
@@ -302,8 +296,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64(
@@ -350,8 +343,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64(
@@ -398,8 +390,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64(
@@ -446,8 +437,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64(
@@ -494,8 +484,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
@@ -542,8 +531,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64(
@@ -778,8 +766,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64(
@@ -826,8 +813,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64(
@@ -874,8 +860,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64(
@@ -922,8 +907,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64(
@@ -970,8 +954,7 @@ define <vscale x 1 x bfloat> @intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i64(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x bfloat> @llvm.riscv.vloxei.nxv1bf16.nxv1i64(
@@ -1018,8 +1001,7 @@ define <vscale x 2 x bfloat> @intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i64(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x bfloat> @llvm.riscv.vloxei.nxv2bf16.nxv2i64(
@@ -1066,8 +1048,7 @@ define <vscale x 4 x bfloat> @intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i64(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x bfloat> @llvm.riscv.vloxei.nxv4bf16.nxv4i64(
@@ -1114,8 +1095,7 @@ define <vscale x 8 x bfloat> @intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i64(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x bfloat> @llvm.riscv.vloxei.nxv8bf16.nxv8i64(
@@ -1162,8 +1142,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vloxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64(
@@ -1210,8 +1189,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vloxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64(
@@ -1258,8 +1236,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vloxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64(
@@ -1306,8 +1283,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vloxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
index 03b4268975d4c..fdd4cc6442a22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
@@ -62,8 +61,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
@@ -110,8 +108,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
@@ -158,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vloxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
@@ -206,8 +202,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vloxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
@@ -254,8 +249,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
@@ -302,8 +296,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
@@ -350,8 +343,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
@@ -398,8 +390,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
@@ -446,8 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(ptr %0
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vloxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
@@ -777,8 +767,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
@@ -825,8 +815,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
@@ -873,8 +863,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
@@ -921,8 +911,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
@@ -969,8 +958,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
@@ -1017,8 +1005,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
@@ -1065,8 +1052,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
@@ -1113,8 +1099,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vloxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
@@ -1444,8 +1429,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
@@ -1492,8 +1477,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
@@ -1540,8 +1525,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei32.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei32.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
@@ -1588,8 +1573,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
@@ -1636,8 +1620,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
@@ -1684,8 +1667,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
@@ -1732,8 +1714,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vloxei16.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
@@ -1780,8 +1761,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vloxei16.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
@@ -1828,8 +1808,7 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vloxei16.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
@@ -2254,8 +2233,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
@@ -2302,8 +2281,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
@@ -2350,8 +2329,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
@@ -2494,8 +2473,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
@@ -2542,8 +2521,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
@@ -2968,8 +2947,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
@@ -3016,8 +2995,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
@@ -3064,8 +3043,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(ptr
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
@@ -3208,8 +3187,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
@@ -3256,8 +3235,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vloxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vloxei16.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
@@ -3777,8 +3756,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
@@ -3825,8 +3804,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
@@ -3873,8 +3852,8 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
@@ -4065,8 +4044,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
@@ -4113,8 +4092,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
@@ -4305,8 +4284,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
@@ -4497,8 +4476,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
@@ -4545,8 +4524,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
@@ -4593,8 +4572,8 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
@@ -4641,8 +4620,7 @@ define <vscale x 1 x bfloat> @intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i32(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1bf16_nxv1bf16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x bfloat> @llvm.riscv.vloxei.nxv1bf16.nxv1i32(
@@ -4689,8 +4667,7 @@ define <vscale x 2 x bfloat> @intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i32(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2bf16_nxv2bf16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vloxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x bfloat> @llvm.riscv.vloxei.nxv2bf16.nxv2i32(
@@ -4737,8 +4714,7 @@ define <vscale x 4 x bfloat> @intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i32(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4bf16_nxv4bf16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vloxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x bfloat> @llvm.riscv.vloxei.nxv4bf16.nxv4i32(
@@ -4785,8 +4761,7 @@ define <vscale x 8 x bfloat> @intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i32(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8bf16_nxv8bf16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vloxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x bfloat> @llvm.riscv.vloxei.nxv8bf16.nxv8i32(
@@ -4833,8 +4808,7 @@ define <vscale x 16 x bfloat> @intrinsic_vloxei_v_nxv16bf16_nxv16bf16_nxv16i32(p
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16bf16_nxv16bf16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vloxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vloxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x bfloat> @llvm.riscv.vloxei.nxv16bf16.nxv16i32(
@@ -5025,8 +4999,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
@@ -5073,8 +5047,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
@@ -5265,8 +5239,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vloxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vloxei8.v v8, (a0), v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
index b102a12dea9a9..82e3b03cfb0d9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64(
@@ -62,8 +61,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64(
@@ -110,8 +108,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64(
@@ -158,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64(
@@ -206,8 +202,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64(
@@ -254,8 +249,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64(
@@ -302,8 +296,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64(
@@ -350,8 +343,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64(
@@ -398,8 +390,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64(
@@ -446,8 +437,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(
@@ -494,8 +484,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
@@ -542,8 +531,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64(
@@ -778,8 +766,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64(
@@ -826,8 +813,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64(
@@ -874,8 +860,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64(
@@ -922,8 +907,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64(
@@ -970,8 +954,7 @@ define <vscale x 1 x bfloat> @intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i64(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x bfloat> @llvm.riscv.vluxei.nxv1bf16.nxv1i64(
@@ -1018,8 +1001,7 @@ define <vscale x 2 x bfloat> @intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i64(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x bfloat> @llvm.riscv.vluxei.nxv2bf16.nxv2i64(
@@ -1066,8 +1048,7 @@ define <vscale x 4 x bfloat> @intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i64(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x bfloat> @llvm.riscv.vluxei.nxv4bf16.nxv4i64(
@@ -1114,8 +1095,7 @@ define <vscale x 8 x bfloat> @intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i64(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x bfloat> @llvm.riscv.vluxei.nxv8bf16.nxv8i64(
@@ -1162,8 +1142,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vluxei64.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64(
@@ -1210,8 +1189,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vluxei64.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64(
@@ -1258,8 +1236,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vluxei64.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64(
@@ -1306,8 +1283,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vluxei64.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei64.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
index 13c9f3304183e..b2cbd116f5389 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
@@ -62,8 +61,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
@@ -110,8 +108,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vluxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
@@ -158,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
@@ -206,8 +202,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
@@ -254,8 +249,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
@@ -302,8 +296,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
@@ -350,8 +343,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
@@ -398,8 +390,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
@@ -446,8 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(ptr %0
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
@@ -777,8 +767,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
@@ -825,8 +815,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
@@ -873,8 +863,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
@@ -921,8 +911,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
@@ -969,8 +958,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
@@ -1017,8 +1005,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
@@ -1065,8 +1052,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
@@ -1113,8 +1099,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
@@ -1161,8 +1146,7 @@ define <vscale x 1 x bfloat> @intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i32(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1bf16_nxv1bf16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x bfloat> @llvm.riscv.vluxei.nxv1bf16.nxv1i32(
@@ -1209,8 +1193,7 @@ define <vscale x 2 x bfloat> @intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i32(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2bf16_nxv2bf16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vluxei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x bfloat> @llvm.riscv.vluxei.nxv2bf16.nxv2i32(
@@ -1257,8 +1240,7 @@ define <vscale x 4 x bfloat> @intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i32(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4bf16_nxv4bf16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vluxei32.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x bfloat> @llvm.riscv.vluxei.nxv4bf16.nxv4i32(
@@ -1305,8 +1287,7 @@ define <vscale x 8 x bfloat> @intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i32(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8bf16_nxv8bf16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vluxei32.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x bfloat> @llvm.riscv.vluxei.nxv8bf16.nxv8i32(
@@ -1353,8 +1334,7 @@ define <vscale x 16 x bfloat> @intrinsic_vluxei_v_nxv16bf16_nxv16bf16_nxv16i32(p
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16bf16_nxv16bf16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vluxei32.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei32.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x bfloat> @llvm.riscv.vluxei.nxv16bf16.nxv16i32(
@@ -1684,8 +1664,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
@@ -1732,8 +1712,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
@@ -1780,8 +1760,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei32.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei32.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
@@ -1828,8 +1808,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vluxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
@@ -1876,8 +1855,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vluxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
@@ -1924,8 +1902,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vluxei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
@@ -1972,8 +1949,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(ptr %0, <vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vluxei16.v v10, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
@@ -2020,8 +1996,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vluxei16.v v12, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
@@ -2068,8 +2043,7 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vluxei16.v v16, (a0), v8
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vluxei16.v v8, (a0), v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
@@ -2494,8 +2468,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
@@ -2542,8 +2516,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
@@ -2590,8 +2564,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(ptr %0
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
@@ -2734,8 +2708,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
@@ -2782,8 +2756,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
@@ -3208,8 +3182,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
@@ -3256,8 +3230,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
@@ -3304,8 +3278,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(ptr
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
@@ -3448,8 +3422,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
@@ -3496,8 +3470,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vluxei16.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vluxei16.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
@@ -4017,8 +3991,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
@@ -4065,8 +4039,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
@@ -4113,8 +4087,8 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
@@ -4305,8 +4279,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
@@ -4353,8 +4327,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
@@ -4545,8 +4519,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(ptr %0, <vs
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
@@ -4737,8 +4711,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(ptr %0, <v
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
@@ -4785,8 +4759,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(ptr %0
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
@@ -4833,8 +4807,8 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(ptr %0
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
@@ -5025,8 +4999,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(ptr %0, <
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
@@ -5073,8 +5047,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(ptr %
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
@@ -5265,8 +5239,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(ptr %0,
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vluxei8.v v8, (a0), v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vluxei8.v v8, (a0), v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
index 31c12db79a946..b785ff8600ab3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i8_nxv1i8(<vsc
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8(
@@ -37,8 +36,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i8_nxv2i8(<vsc
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i8.nxv2i8(
@@ -60,8 +58,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i8_nxv4i8(<vsc
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i8.nxv4i8(
@@ -83,8 +80,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i8_nxv8i8(<vsc
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i8.nxv8i8(
@@ -106,8 +102,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i8_nxv16i8(
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i8.nxv16i8(
@@ -129,8 +124,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i8_nxv32i8(
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i8.nxv32i8(
@@ -152,8 +146,7 @@ define <vscale x 64 x i1> @intrinsic_vmadc.carry.in_vvm_nxv64i1_nxv64i8_nxv64i8(
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv64i1_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmadc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmadc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmadc.carry.in.nxv64i8.nxv64i8(
@@ -175,8 +168,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i16_nxv1i16(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i16.nxv1i16(
@@ -198,8 +190,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i16_nxv2i16(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i16.nxv2i16(
@@ -221,8 +212,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i16_nxv4i16(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i16.nxv4i16(
@@ -244,8 +234,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i16_nxv8i16(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i16.nxv8i16(
@@ -267,8 +256,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i16_nxv16i1
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i16.nxv16i16(
@@ -290,8 +278,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i16_nxv32i1
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vmadc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmadc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i16.nxv32i16(
@@ -313,8 +300,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i32_nxv1i32(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i32.nxv1i32(
@@ -336,8 +322,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i32_nxv2i32(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i32.nxv2i32(
@@ -359,8 +344,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i32_nxv4i32(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.nxv4i32(
@@ -382,8 +366,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i32_nxv8i32(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i32.nxv8i32(
@@ -405,8 +388,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i32_nxv16i3
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmadc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmadc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i32.nxv16i32(
@@ -428,8 +410,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i64_nxv1i64(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv1i1_nxv1i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vmadc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i64.nxv1i64(
@@ -451,8 +432,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i64_nxv2i64(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv2i1_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmadc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i64.nxv2i64(
@@ -474,8 +454,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i64_nxv4i64(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv4i1_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmadc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i64.nxv4i64(
@@ -497,8 +476,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i64_nxv8i64(<v
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmadc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmadc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i64.nxv8i64(
@@ -520,8 +498,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i8.i8(
@@ -543,8 +520,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i8.i8(
@@ -566,8 +542,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i8.i8(
@@ -589,8 +564,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i8.i8(
@@ -612,8 +586,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i8.i8(
@@ -635,8 +608,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vxm_nxv32i1_nxv32i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv32i1_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmadc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i8.i8(
@@ -658,8 +630,7 @@ define <vscale x 64 x i1> @intrinsic_vmadc.carry.in_vxm_nxv64i1_nxv64i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv64i1_nxv64i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmadc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmadc.carry.in.nxv64i8.i8(
@@ -681,8 +652,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i16.i16(
@@ -704,8 +674,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i16.i16(
@@ -727,8 +696,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i16.i16(
@@ -750,8 +718,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i16.i16(
@@ -773,8 +740,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i16_i16(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmadc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i16.i16(
@@ -796,8 +762,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vxm_nxv32i1_nxv32i16_i16(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv32i1_nxv32i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmadc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i16.i16(
@@ -819,8 +784,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i32.i32(
@@ -842,8 +806,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmadc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i32.i32(
@@ -865,8 +828,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmadc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.i32(
@@ -888,8 +850,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmadc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i32.i32(
@@ -911,8 +872,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i32_i32(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv16i1_nxv16i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmadc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i32.i32(
@@ -938,17 +898,15 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64(<vscal
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmadc.vvm v9, v8, v10, v0
-; RV32-NEXT:    vmv.v.v v0, v9
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vmadc.vvm v0, v8, v9, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
-; RV64-NEXT:    vmadc.vxm v9, v8, a0, v0
-; RV64-NEXT:    vmv.v.v v0, v9
+; RV64-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i64.i64(
@@ -974,17 +932,15 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64(<vscal
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmadc.vvm v10, v8, v12, v0
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmadc.vvm v0, v8, v10, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmadc.vxm v10, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i64.i64(
@@ -1010,17 +966,15 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64(<vscal
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmadc.vvm v12, v8, v16, v0
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmadc.vvm v0, v8, v12, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmadc.vxm v12, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i64.i64(
@@ -1046,17 +1000,15 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64(<vscal
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmadc.vvm v16, v8, v24, v0
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmadc.vvm v0, v8, v16, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmadc.vxm v16, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmadc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i64.i64(
@@ -1072,8 +1024,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i8.i8(
@@ -1089,8 +1040,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i8.i8(
@@ -1106,8 +1056,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i8.i8(
@@ -1123,8 +1072,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i8.i8(
@@ -1140,8 +1088,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmadc.vim v10, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i8.i8(
@@ -1157,8 +1104,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vim_nxv32i1_nxv32i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv32i1_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmadc.vim v12, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i8.i8(
@@ -1174,8 +1120,7 @@ define <vscale x 64 x i1> @intrinsic_vmadc.carry.in_vim_nxv64i1_nxv64i8_i8(<vsca
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv64i1_nxv64i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmadc.vim v16, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmadc.carry.in.nxv64i8.i8(
@@ -1191,8 +1136,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i16.i16(
@@ -1208,8 +1152,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i16.i16(
@@ -1225,8 +1168,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i16.i16(
@@ -1242,8 +1184,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmadc.vim v10, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i16.i16(
@@ -1259,8 +1200,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i16_i16(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmadc.vim v12, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i16.i16(
@@ -1276,8 +1216,7 @@ define <vscale x 32 x i1> @intrinsic_vmadc.carry.in_vim_nxv32i1_nxv32i16_i16(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv32i1_nxv32i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vmadc.vim v16, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmadc.carry.in.nxv32i16.i16(
@@ -1293,8 +1232,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i32.i32(
@@ -1310,8 +1248,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i32.i32(
@@ -1327,8 +1264,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmadc.vim v10, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i32.i32(
@@ -1344,8 +1280,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i32_i32(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmadc.vim v12, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i32.i32(
@@ -1361,8 +1296,7 @@ define <vscale x 16 x i1> @intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i32_i32(<vs
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv16i1_nxv16i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmadc.vim v16, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmadc.carry.in.nxv16i32.i32(
@@ -1378,8 +1312,7 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i64_i64(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv1i1_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vmadc.vim v9, v8, 9, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i64.i64(
@@ -1395,8 +1328,7 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i64_i64(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv2i1_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmadc.vim v10, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i64.i64(
@@ -1412,8 +1344,7 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i64_i64(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv4i1_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmadc.vim v12, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i64.i64(
@@ -1429,8 +1360,7 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i64_i64(<vscal
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vim_nxv8i1_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmadc.vim v16, v8, 9, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmadc.vim v0, v8, 9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
index 9ce7d68ba4012..f0c6911fa4099 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
@@ -14,8 +14,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i8_nxv1i8(<vs
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i8_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8(
@@ -37,8 +36,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i8_nxv2i8(<vs
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i8_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i8.nxv2i8(
@@ -60,8 +58,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i8_nxv4i8(<vs
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i8_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i8.nxv4i8(
@@ -83,8 +80,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i8_nxv8i8(<vs
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i8.nxv8i8(
@@ -106,8 +102,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i8_nxv16i8
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i8.nxv16i8(
@@ -129,8 +124,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i8_nxv32i8
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsbc.borrow.in.nxv32i8.nxv32i8(
@@ -152,8 +146,7 @@ define <vscale x 64 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv64i1_nxv64i8_nxv64i8
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv64i1_nxv64i8_nxv64i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmsbc.borrow.in.nxv64i8.nxv64i8(
@@ -175,8 +168,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i16_nxv1i16(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i16.nxv1i16(
@@ -198,8 +190,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i16_nxv2i16(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i16.nxv2i16(
@@ -221,8 +212,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i16_nxv4i16(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i16.nxv4i16(
@@ -244,8 +234,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i16_nxv8i16(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i16.nxv8i16(
@@ -267,8 +256,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i16_nxv16i
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i16.nxv16i16(
@@ -290,8 +278,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i16_nxv32i
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsbc.borrow.in.nxv32i16.nxv32i16(
@@ -313,8 +300,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i32_nxv1i32(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i32.nxv1i32(
@@ -336,8 +322,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i32_nxv2i32(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i32.nxv2i32(
@@ -359,8 +344,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i32_nxv4i32(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i32.nxv4i32(
@@ -382,8 +366,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i32_nxv8i32(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i32.nxv8i32(
@@ -405,8 +388,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i32_nxv16i
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i32.nxv16i32(
@@ -428,8 +410,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i64_nxv1i64(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv1i1_nxv1i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v10, v8, v9, v0
-; CHECK-NEXT:    vmv.v.v v0, v10
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i64.nxv1i64(
@@ -451,8 +432,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i64_nxv2i64(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv2i1_nxv2i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v12, v8, v10, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v10, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i64.nxv2i64(
@@ -474,8 +454,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i64_nxv4i64(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv4i1_nxv4i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v16, v8, v12, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v12, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i64.nxv4i64(
@@ -497,8 +476,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i64_nxv8i64(<
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vvm v24, v8, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmsbc.vvm v0, v8, v16, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i64.nxv8i64(
@@ -520,8 +498,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i8.i8(
@@ -543,8 +520,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i8.i8(
@@ -566,8 +542,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i8.i8(
@@ -589,8 +564,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i8.i8(
@@ -612,8 +586,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i8_i8(<vsc
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i8.i8(
@@ -635,8 +608,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv32i1_nxv32i8_i8(<vsc
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv32i1_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsbc.borrow.in.nxv32i8.i8(
@@ -658,8 +630,7 @@ define <vscale x 64 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv64i1_nxv64i8_i8(<vsc
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv64i1_nxv64i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmsbc.borrow.in.nxv64i8.i8(
@@ -681,8 +652,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i16.i16(
@@ -704,8 +674,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i16.i16(
@@ -727,8 +696,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i16.i16(
@@ -750,8 +718,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i16.i16(
@@ -773,8 +740,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i16_i16(<v
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i16.i16(
@@ -796,8 +762,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv32i1_nxv32i16_i16(<v
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv32i1_nxv32i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsbc.borrow.in.nxv32i16.i16(
@@ -819,8 +784,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i32_i32(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i32.i32(
@@ -842,8 +806,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i32_i32(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; CHECK-NEXT:    vmv.v.v v0, v9
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i32.i32(
@@ -865,8 +828,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i32_i32(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v10, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i32.i32(
@@ -888,8 +850,7 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i32_i32(<vsca
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v12, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v12
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i32.i32(
@@ -911,8 +872,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i32_i32(<v
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv16i1_nxv16i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vmsbc.vxm v16, v8, a0, v0
-; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsbc.borrow.in.nxv16i32.i32(
@@ -938,17 +898,15 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64(<vsca
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmsbc.vvm v9, v8, v10, v0
-; RV32-NEXT:    vmv.v.v v0, v9
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vmsbc.vvm v0, v8, v9, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
-; RV64-NEXT:    vmsbc.vxm v9, v8, a0, v0
-; RV64-NEXT:    vmv.v.v v0, v9
+; RV64-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i64.i64(
@@ -974,17 +932,15 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64(<vsca
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmsbc.vvm v10, v8, v12, v0
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmsbc.vvm v0, v8, v10, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmsbc.vxm v10, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i64.i64(
@@ -1010,17 +966,15 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64(<vsca
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmsbc.vvm v12, v8, v16, v0
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmsbc.vvm v0, v8, v12, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsbc.vxm v12, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i64.i64(
@@ -1046,17 +1000,15 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64(<vsca
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a0), zero
-; RV32-NEXT:    vmsbc.vvm v16, v8, v24, v0
-; RV32-NEXT:    vmv1r.v v0, v16
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vmsbc.vvm v0, v8, v16, v0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vmsbc.vxm v16, v8, a0, v0
-; RV64-NEXT:    vmv1r.v v0, v16
+; RV64-NEXT:    vmsbc.vxm v0, v8, a0, v0
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index 45e3840f7e673..57fdf24c8965d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -1162,8 +1162,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i8_i8(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8(
@@ -1210,8 +1210,8 @@ define <vscale x 32 x i1> @intrinsic_vmsge_vx_nxv32i8_i8(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8(
@@ -1402,8 +1402,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i16_i16(<vscale x 8 x i16> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16(
@@ -1450,8 +1450,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i16_i16(<vscale x 16 x i16> %
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16(
@@ -1594,8 +1594,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i32_i32(<vscale x 4 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32(
@@ -1642,8 +1642,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i32_i32(<vscale x 8 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32(
@@ -1777,8 +1777,8 @@ define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsge_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmslt.vx v10, v8, a0
-; RV64-NEXT:    vmnot.m v0, v10
+; RV64-NEXT:    vmslt.vx v8, v8, a0
+; RV64-NEXT:    vmnot.m v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64(
@@ -1852,8 +1852,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsge_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0
-; RV64-NEXT:    vmnot.m v0, v12
+; RV64-NEXT:    vmslt.vx v8, v8, a0
+; RV64-NEXT:    vmnot.m v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64(
@@ -2644,8 +2644,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8(
@@ -2662,8 +2662,8 @@ define <vscale x 32 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8(
@@ -2734,8 +2734,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16(
@@ -2752,8 +2752,8 @@ define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16(<vscal
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16(
@@ -2806,8 +2806,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmslt.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32(
@@ -2824,8 +2824,8 @@ define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmslt.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmslt.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32(
@@ -2876,18 +2876,16 @@ define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsle.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v10, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmslt.vx v10, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v10
+; RV64-NEXT:    vmslt.vx v8, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64(
@@ -2908,18 +2906,16 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsle.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmsle.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmslt.vx v12, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v12
+; RV64-NEXT:    vmslt.vx v8, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index d3f57d58c7ab7..392b9d308acf7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -1123,8 +1123,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i8_i8(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8.i8(
@@ -1171,8 +1171,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_vx_nxv32i8_i8(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8.i8(
@@ -1363,8 +1363,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i16_i16(<vscale x 8 x i16> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16.i16(
@@ -1411,8 +1411,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i16_i16(<vscale x 16 x i16>
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16.i16(
@@ -1555,8 +1555,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i32_i32(<vscale x 4 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32.i32(
@@ -1603,8 +1603,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i32_i32(<vscale x 8 x i32> %0,
 ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmnot.m v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32.i32(
@@ -1738,8 +1738,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmsltu.vx v10, v8, a0
-; RV64-NEXT:    vmnot.m v0, v10
+; RV64-NEXT:    vmsltu.vx v8, v8, a0
+; RV64-NEXT:    vmnot.m v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64.i64(
@@ -1813,8 +1813,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
 ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0
-; RV64-NEXT:    vmnot.m v0, v12
+; RV64-NEXT:    vmsltu.vx v8, v8, a0
+; RV64-NEXT:    vmnot.m v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64.i64(
@@ -2623,8 +2623,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8(
@@ -2641,8 +2641,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8(
@@ -2713,8 +2713,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16(
@@ -2731,8 +2731,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16(<vsca
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16(
@@ -2785,8 +2785,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmsltu.vx v10, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v10
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32(
@@ -2803,8 +2803,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32(<vscale
 ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmsltu.vx v12, v8, a0
-; CHECK-NEXT:    vmandn.mm v0, v0, v12
+; CHECK-NEXT:    vmsltu.vx v8, v8, a0
+; CHECK-NEXT:    vmandn.mm v0, v0, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32(
@@ -2855,18 +2855,16 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmv1r.v v10, v0
-; RV32-NEXT:    vmsleu.vv v10, v12, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v10, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT:    vmsltu.vx v10, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v10
+; RV64-NEXT:    vmsltu.vx v8, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64(
@@ -2887,18 +2885,16 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmv1r.v v12, v0
-; RV32-NEXT:    vmsleu.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vmsleu.vv v0, v12, v8, v0.t
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT:    vmsltu.vx v12, v8, a0
-; RV64-NEXT:    vmandn.mm v0, v0, v12
+; RV64-NEXT:    vmsltu.vx v8, v8, a0
+; RV64-NEXT:    vmandn.mm v0, v0, v8
 ; RV64-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclip.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip.ll
index 8902b1a28f8c1..ad13560a8db90 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclip.ll
@@ -159,8 +159,7 @@ define <vscale x 8 x i8> @intrinsic_vnclip_wv_nxv8i8_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnclip.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclip.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclip.nxv8i8.nxv8i16.nxv8i8(
@@ -208,8 +207,7 @@ define <vscale x 16 x i8> @intrinsic_vnclip_wv_nxv16i8_nxv16i16_nxv16i8(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnclip.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclip.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclip.nxv16i8.nxv16i16.nxv16i8(
@@ -257,8 +255,7 @@ define <vscale x 32 x i8> @intrinsic_vnclip_wv_nxv32i8_nxv32i16_nxv32i8(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnclip.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclip.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclip.nxv32i8.nxv32i16.nxv32i8(
@@ -402,8 +399,7 @@ define <vscale x 4 x i16> @intrinsic_vnclip_wv_nxv4i16_nxv4i32_nxv4i16(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclip.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclip.nxv4i16.nxv4i32.nxv4i16(
@@ -451,8 +447,7 @@ define <vscale x 8 x i16> @intrinsic_vnclip_wv_nxv8i16_nxv8i32_nxv8i16(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnclip.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclip.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclip.nxv8i16.nxv8i32.nxv8i16(
@@ -500,8 +495,7 @@ define <vscale x 16 x i16> @intrinsic_vnclip_wv_nxv16i16_nxv16i32_nxv16i16(<vsca
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnclip.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclip.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclip.nxv16i16.nxv16i32.nxv16i16(
@@ -597,8 +591,7 @@ define <vscale x 2 x i32> @intrinsic_vnclip_wv_nxv2i32_nxv2i64_nxv2i32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnclip.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclip.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclip.nxv2i32.nxv2i64.nxv2i32(
@@ -646,8 +639,7 @@ define <vscale x 4 x i32> @intrinsic_vnclip_wv_nxv4i32_nxv4i64_nxv4i32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclip.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclip.nxv4i32.nxv4i64.nxv4i32(
@@ -695,8 +687,7 @@ define <vscale x 8 x i32> @intrinsic_vnclip_wv_nxv8i32_nxv8i64_nxv8i32(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnclip.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclip.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclip.nxv8i32.nxv8i64.nxv8i32(
@@ -884,8 +875,7 @@ define <vscale x 8 x i8> @intrinsic_vnclip_vx_nxv8i8_nxv8i16(<vscale x 8 x i16>
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vnclip.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclip.nxv8i8.nxv8i16(
@@ -932,8 +922,7 @@ define <vscale x 16 x i8> @intrinsic_vnclip_vx_nxv16i8_nxv16i16(<vscale x 16 x i
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vnclip.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclip.nxv16i8.nxv16i16(
@@ -980,8 +969,7 @@ define <vscale x 32 x i8> @intrinsic_vnclip_vx_nxv32i8_nxv32i16(<vscale x 32 x i
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vnclip.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclip.nxv32i8.nxv32i16(
@@ -1122,8 +1110,7 @@ define <vscale x 4 x i16> @intrinsic_vnclip_vx_nxv4i16_nxv4i32(<vscale x 4 x i32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclip.nxv4i16.nxv4i32(
@@ -1170,8 +1157,7 @@ define <vscale x 8 x i16> @intrinsic_vnclip_vx_nxv8i16_nxv8i32(<vscale x 8 x i32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vnclip.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclip.nxv8i16.nxv8i32(
@@ -1218,8 +1204,7 @@ define <vscale x 16 x i16> @intrinsic_vnclip_vx_nxv16i16_nxv16i32(<vscale x 16 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vnclip.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclip.nxv16i16.nxv16i32(
@@ -1313,8 +1298,7 @@ define <vscale x 2 x i32> @intrinsic_vnclip_vx_nxv2i32_nxv2i64(<vscale x 2 x i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vnclip.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclip.nxv2i32.nxv2i64(
@@ -1361,8 +1345,7 @@ define <vscale x 4 x i32> @intrinsic_vnclip_vx_nxv4i32_nxv4i64(<vscale x 4 x i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclip.nxv4i32.nxv4i64(
@@ -1409,8 +1392,7 @@ define <vscale x 8 x i32> @intrinsic_vnclip_vx_nxv8i32_nxv8i64(<vscale x 8 x i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vnclip.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclip.nxv8i32.nxv8i64(
@@ -1557,8 +1539,7 @@ define <vscale x 8 x i8> @intrinsic_vnclip_vi_nxv8i8_nxv8i16_i8(<vscale x 8 x i1
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclip.nxv8i8.nxv8i16(
@@ -1593,8 +1574,7 @@ define <vscale x 16 x i8> @intrinsic_vnclip_vi_nxv16i8_nxv16i16_i8(<vscale x 16
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclip.nxv16i8.nxv16i16(
@@ -1629,8 +1609,7 @@ define <vscale x 32 x i8> @intrinsic_vnclip_vi_nxv32i8_nxv32i16_i8(<vscale x 32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnclip.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclip.nxv32i8.nxv32i16(
@@ -1735,8 +1714,7 @@ define <vscale x 4 x i16> @intrinsic_vnclip_vi_nxv4i16_nxv4i32_i16(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclip.nxv4i16.nxv4i32(
@@ -1771,8 +1749,7 @@ define <vscale x 8 x i16> @intrinsic_vnclip_vi_nxv8i16_nxv8i32_i16(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclip.nxv8i16.nxv8i32(
@@ -1807,8 +1784,7 @@ define <vscale x 16 x i16> @intrinsic_vnclip_vi_nxv16i16_nxv16i32_i16(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnclip.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclip.nxv16i16.nxv16i32(
@@ -1878,8 +1854,7 @@ define <vscale x 2 x i32> @intrinsic_vnclip_vi_nxv2i32_nxv2i64_i32(<vscale x 2 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnclip.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclip.nxv2i32.nxv2i64(
@@ -1914,8 +1889,7 @@ define <vscale x 4 x i32> @intrinsic_vnclip_vi_nxv4i32_nxv4i64_i32(<vscale x 4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnclip.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclip.nxv4i32.nxv4i64(
@@ -1950,8 +1924,7 @@ define <vscale x 8 x i32> @intrinsic_vnclip_vi_nxv8i32_nxv8i64_i32(<vscale x 8 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnclip.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclip.nxv8i32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu.ll
index a1804e7d98a46..8d86b597004c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclipu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu.ll
@@ -159,8 +159,7 @@ define <vscale x 8 x i8> @intrinsic_vnclipu_wv_nxv8i8_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclipu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclipu.nxv8i8.nxv8i16.nxv8i8(
@@ -208,8 +207,7 @@ define <vscale x 16 x i8> @intrinsic_vnclipu_wv_nxv16i8_nxv16i16_nxv16i8(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclipu.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclipu.nxv16i8.nxv16i16.nxv16i8(
@@ -257,8 +255,7 @@ define <vscale x 32 x i8> @intrinsic_vnclipu_wv_nxv32i8_nxv32i16_nxv32i8(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclipu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclipu.nxv32i8.nxv32i16.nxv32i8(
@@ -402,8 +399,7 @@ define <vscale x 4 x i16> @intrinsic_vnclipu_wv_nxv4i16_nxv4i32_nxv4i16(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclipu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclipu.nxv4i16.nxv4i32.nxv4i16(
@@ -451,8 +447,7 @@ define <vscale x 8 x i16> @intrinsic_vnclipu_wv_nxv8i16_nxv8i32_nxv8i16(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclipu.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclipu.nxv8i16.nxv8i32.nxv8i16(
@@ -500,8 +495,7 @@ define <vscale x 16 x i16> @intrinsic_vnclipu_wv_nxv16i16_nxv16i32_nxv16i16(<vsc
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclipu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclipu.nxv16i16.nxv16i32.nxv16i16(
@@ -597,8 +591,7 @@ define <vscale x 2 x i32> @intrinsic_vnclipu_wv_nxv2i32_nxv2i64_nxv2i32(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnclipu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclipu.nxv2i32.nxv2i64.nxv2i32(
@@ -646,8 +639,7 @@ define <vscale x 4 x i32> @intrinsic_vnclipu_wv_nxv4i32_nxv4i64_nxv4i32(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnclipu.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclipu.nxv4i32.nxv4i64.nxv4i32(
@@ -695,8 +687,7 @@ define <vscale x 8 x i32> @intrinsic_vnclipu_wv_nxv8i32_nxv8i64_nxv8i32(<vscale
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnclipu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclipu.nxv8i32.nxv8i64.nxv8i32(
@@ -884,8 +875,7 @@ define <vscale x 8 x i8> @intrinsic_vnclipu_vx_nxv8i8_nxv8i16(<vscale x 8 x i16>
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclipu.nxv8i8.nxv8i16(
@@ -932,8 +922,7 @@ define <vscale x 16 x i8> @intrinsic_vnclipu_vx_nxv16i8_nxv16i16(<vscale x 16 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclipu.nxv16i8.nxv16i16(
@@ -980,8 +969,7 @@ define <vscale x 32 x i8> @intrinsic_vnclipu_vx_nxv32i8_nxv32i16(<vscale x 32 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclipu.nxv32i8.nxv32i16(
@@ -1122,8 +1110,7 @@ define <vscale x 4 x i16> @intrinsic_vnclipu_vx_nxv4i16_nxv4i32(<vscale x 4 x i3
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclipu.nxv4i16.nxv4i32(
@@ -1170,8 +1157,7 @@ define <vscale x 8 x i16> @intrinsic_vnclipu_vx_nxv8i16_nxv8i32(<vscale x 8 x i3
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclipu.nxv8i16.nxv8i32(
@@ -1218,8 +1204,7 @@ define <vscale x 16 x i16> @intrinsic_vnclipu_vx_nxv16i16_nxv16i32(<vscale x 16
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclipu.nxv16i16.nxv16i32(
@@ -1313,8 +1298,7 @@ define <vscale x 2 x i32> @intrinsic_vnclipu_vx_nxv2i32_nxv2i64(<vscale x 2 x i6
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclipu.nxv2i32.nxv2i64(
@@ -1361,8 +1345,7 @@ define <vscale x 4 x i32> @intrinsic_vnclipu_vx_nxv4i32_nxv4i64(<vscale x 4 x i6
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclipu.nxv4i32.nxv4i64(
@@ -1409,8 +1392,7 @@ define <vscale x 8 x i32> @intrinsic_vnclipu_vx_nxv8i32_nxv8i64(<vscale x 8 x i6
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclipu.nxv8i32.nxv8i64(
@@ -1557,8 +1539,7 @@ define <vscale x 8 x i8> @intrinsic_vnclipu_vi_nxv8i8_nxv8i16_i8(<vscale x 8 x i
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnclipu.nxv8i8.nxv8i16(
@@ -1593,8 +1574,7 @@ define <vscale x 16 x i8> @intrinsic_vnclipu_vi_nxv16i8_nxv16i16_i8(<vscale x 16
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnclipu.nxv16i8.nxv16i16(
@@ -1629,8 +1609,7 @@ define <vscale x 32 x i8> @intrinsic_vnclipu_vi_nxv32i8_nxv32i16_i8(<vscale x 32
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnclipu.nxv32i8.nxv32i16(
@@ -1735,8 +1714,7 @@ define <vscale x 4 x i16> @intrinsic_vnclipu_vi_nxv4i16_nxv4i32_i16(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnclipu.nxv4i16.nxv4i32(
@@ -1771,8 +1749,7 @@ define <vscale x 8 x i16> @intrinsic_vnclipu_vi_nxv8i16_nxv8i32_i16(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnclipu.nxv8i16.nxv8i32(
@@ -1807,8 +1784,7 @@ define <vscale x 16 x i16> @intrinsic_vnclipu_vi_nxv16i16_nxv16i32_i16(<vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnclipu.nxv16i16.nxv16i32(
@@ -1878,8 +1854,7 @@ define <vscale x 2 x i32> @intrinsic_vnclipu_vi_nxv2i32_nxv2i64_i32(<vscale x 2
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnclipu.nxv2i32.nxv2i64(
@@ -1914,8 +1889,7 @@ define <vscale x 4 x i32> @intrinsic_vnclipu_vi_nxv4i32_nxv4i64_i32(<vscale x 4
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnclipu.nxv4i32.nxv4i64(
@@ -1950,8 +1924,7 @@ define <vscale x 8 x i32> @intrinsic_vnclipu_vi_nxv8i32_nxv8i64_i32(<vscale x 8
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnclipu.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnclipu.nxv8i32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
index 7b97ca3c0090e..def0f68648152 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
@@ -21,8 +21,7 @@ define <vscale x 2 x i32> @vnsra_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsra_wx_i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -35,8 +34,7 @@ define <vscale x 4 x i32> @vnsra_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsra_wx_i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -49,8 +47,7 @@ define <vscale x 8 x i32> @vnsra_wx_i64_nxv8i32(<vscale x 8 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsra_wx_i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -129,8 +126,7 @@ define <vscale x 2 x i32> @vnsra_wv_nxv2i32_sext(<vscale x 2 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsra.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
   %x = ashr <vscale x 2 x i64> %va, %vc
@@ -142,8 +138,7 @@ define <vscale x 2 x i32> @vnsra_wx_i32_nxv2i32_sext(<vscale x 2 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -157,8 +152,7 @@ define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -172,8 +166,7 @@ define <vscale x 2 x i32> @vnsra_wx_i8_nxv2i32_sext(<vscale x 2 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -187,8 +180,7 @@ define <vscale x 2 x i32> @vnsra_wi_i32_nxv2i32_sext(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 2 x i32> splat (i32 15) to <vscale x 2 x i64>
   %x = ashr <vscale x 2 x i64> %va, %vb
@@ -200,8 +192,7 @@ define <vscale x 4 x i32> @vnsra_wv_nxv4i32_sext(<vscale x 4 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsra.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
   %x = ashr <vscale x 4 x i64> %va, %vc
@@ -213,8 +204,7 @@ define <vscale x 4 x i32> @vnsra_wx_i32_nxv4i32_sext(<vscale x 4 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -228,8 +218,7 @@ define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -243,8 +232,7 @@ define <vscale x 4 x i32> @vnsra_wx_i8_nxv4i32_sext(<vscale x 4 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -258,8 +246,7 @@ define <vscale x 4 x i32> @vnsra_wi_i32_nxv4i32_sext(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 4 x i32> splat (i32 15) to <vscale x 4 x i64>
   %x = ashr <vscale x 4 x i64> %va, %vb
@@ -271,8 +258,7 @@ define <vscale x 8 x i32> @vnsra_wv_nxv8i32_sext(<vscale x 8 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsra.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
   %x = ashr <vscale x 8 x i64> %va, %vc
@@ -284,8 +270,7 @@ define <vscale x 8 x i32> @vnsra_wx_i32_nxv8i32_sext(<vscale x 8 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -299,8 +284,7 @@ define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -314,8 +298,7 @@ define <vscale x 8 x i32> @vnsra_wx_i8_nxv8i32_sext(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -329,8 +312,7 @@ define <vscale x 8 x i32> @vnsra_wi_i32_nxv8i32_sext(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 8 x i32> splat (i32 15) to <vscale x 8 x i64>
   %x = ashr <vscale x 8 x i64> %va, %vb
@@ -408,8 +390,7 @@ define <vscale x 2 x i32> @vnsra_wv_nxv2i32_zext(<vscale x 2 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsra.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
   %x = ashr <vscale x 2 x i64> %va, %vc
@@ -421,8 +402,7 @@ define <vscale x 2 x i32> @vnsra_wx_i32_nxv2i32_zext(<vscale x 2 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -436,8 +416,7 @@ define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -451,8 +430,7 @@ define <vscale x 2 x i32> @vnsra_wx_i8_nxv2i32_zext(<vscale x 2 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -466,8 +444,7 @@ define <vscale x 2 x i32> @vnsra_wi_i32_nxv2i32_zext(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 2 x i32> splat (i32 15) to <vscale x 2 x i64>
   %x = ashr <vscale x 2 x i64> %va, %vb
@@ -479,8 +456,7 @@ define <vscale x 4 x i32> @vnsra_wv_nxv4i32_zext(<vscale x 4 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsra.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
   %x = ashr <vscale x 4 x i64> %va, %vc
@@ -492,8 +468,7 @@ define <vscale x 4 x i32> @vnsra_wx_i32_nxv4i32_zext(<vscale x 4 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -507,8 +482,7 @@ define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -522,8 +496,7 @@ define <vscale x 4 x i32> @vnsra_wx_i8_nxv4i32_zext(<vscale x 4 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -537,8 +510,7 @@ define <vscale x 4 x i32> @vnsra_wi_i32_nxv4i32_zext(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 4 x i32> splat (i32 15) to <vscale x 4 x i64>
   %x = ashr <vscale x 4 x i64> %va, %vb
@@ -550,8 +522,7 @@ define <vscale x 8 x i32> @vnsra_wv_nxv8i32_zext(<vscale x 8 x i64> %va, <vscale
 ; CHECK-LABEL: vnsra_wv_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsra.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
   %x = ashr <vscale x 8 x i64> %va, %vc
@@ -563,8 +534,7 @@ define <vscale x 8 x i32> @vnsra_wx_i32_nxv8i32_zext(<vscale x 8 x i64> %va, i32
 ; CHECK-LABEL: vnsra_wx_i32_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -578,8 +548,7 @@ define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vnsra_wx_i16_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -593,8 +562,7 @@ define <vscale x 8 x i32> @vnsra_wx_i8_nxv8i32_zext(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vnsra_wx_i8_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -608,8 +576,7 @@ define <vscale x 8 x i32> @vnsra_wi_i32_nxv8i32_zext(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vnsra_wi_i32_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 8 x i32> splat (i32 15) to <vscale x 8 x i64>
   %x = ashr <vscale x 8 x i64> %va, %vb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra.ll
index ed4ec10d89469..0bda8bb672522 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra.ll
@@ -155,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vnsra_wv_nxv8i8_nxv8i16_nxv8i8(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv8i8_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnsra.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsra.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsra.nxv8i8.nxv8i16.nxv8i8(
@@ -203,8 +202,7 @@ define <vscale x 16 x i8> @intrinsic_vnsra_wv_nxv16i8_nxv16i16_nxv16i8(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv16i8_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnsra.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsra.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsra.nxv16i8.nxv16i16.nxv16i8(
@@ -251,8 +249,7 @@ define <vscale x 32 x i8> @intrinsic_vnsra_wv_nxv32i8_nxv32i16_nxv32i8(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv32i8_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnsra.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsra.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsra.nxv32i8.nxv32i16.nxv32i8(
@@ -393,8 +390,7 @@ define <vscale x 4 x i16> @intrinsic_vnsra_wv_nxv4i16_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv4i16_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnsra.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsra.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32.nxv4i16(
@@ -441,8 +437,7 @@ define <vscale x 8 x i16> @intrinsic_vnsra_wv_nxv8i16_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv8i16_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnsra.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsra.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsra.nxv8i16.nxv8i32.nxv8i16(
@@ -489,8 +484,7 @@ define <vscale x 16 x i16> @intrinsic_vnsra_wv_nxv16i16_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv16i16_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsra.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsra.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsra.nxv16i16.nxv16i32.nxv16i16(
@@ -584,8 +578,7 @@ define <vscale x 2 x i32> @intrinsic_vnsra_wv_nxv2i32_nxv2i64_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv2i32_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsra.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsra.nxv2i32.nxv2i64.nxv2i32(
@@ -632,8 +625,7 @@ define <vscale x 4 x i32> @intrinsic_vnsra_wv_nxv4i32_nxv4i64_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv4i32_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsra.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsra.nxv4i32.nxv4i64.nxv4i32(
@@ -680,8 +672,7 @@ define <vscale x 8 x i32> @intrinsic_vnsra_wv_nxv8i32_nxv8i64_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv8i32_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsra.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsra.nxv8i32.nxv8i64.nxv8i32(
@@ -869,8 +860,7 @@ define <vscale x 8 x i8> @intrinsic_vnsra_vx_nxv8i8_nxv8i16(<vscale x 8 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsra.nxv8i8.nxv8i16(
@@ -917,8 +907,7 @@ define <vscale x 16 x i8> @intrinsic_vnsra_vx_nxv16i8_nxv16i16(<vscale x 16 x i1
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsra.nxv16i8.nxv16i16(
@@ -965,8 +954,7 @@ define <vscale x 32 x i8> @intrinsic_vnsra_vx_nxv32i8_nxv32i16(<vscale x 32 x i1
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsra.nxv32i8.nxv32i16(
@@ -1107,8 +1095,7 @@ define <vscale x 4 x i16> @intrinsic_vnsra_vx_nxv4i16_nxv4i32(<vscale x 4 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(
@@ -1155,8 +1142,7 @@ define <vscale x 8 x i16> @intrinsic_vnsra_vx_nxv8i16_nxv8i32(<vscale x 8 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsra.nxv8i16.nxv8i32(
@@ -1203,8 +1189,7 @@ define <vscale x 16 x i16> @intrinsic_vnsra_vx_nxv16i16_nxv16i32(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsra.nxv16i16.nxv16i32(
@@ -1298,8 +1283,7 @@ define <vscale x 2 x i32> @intrinsic_vnsra_vx_nxv2i32_nxv2i64(<vscale x 2 x i64>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsra.nxv2i32.nxv2i64(
@@ -1346,8 +1330,7 @@ define <vscale x 4 x i32> @intrinsic_vnsra_vx_nxv4i32_nxv4i64(<vscale x 4 x i64>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsra.nxv4i32.nxv4i64(
@@ -1394,8 +1377,7 @@ define <vscale x 8 x i32> @intrinsic_vnsra_vx_nxv8i32_nxv8i64(<vscale x 8 x i64>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsra.nxv8i32.nxv8i64(
@@ -1535,8 +1517,7 @@ define <vscale x 8 x i8> @intrinsic_vnsra_vi_nxv8i8_nxv8i16_i8(<vscale x 8 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv8i8_nxv8i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnsra.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsra.nxv8i8.nxv8i16(
@@ -1569,8 +1550,7 @@ define <vscale x 16 x i8> @intrinsic_vnsra_vi_nxv16i8_nxv16i16_i8(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv16i8_nxv16i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnsra.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsra.nxv16i8.nxv16i16(
@@ -1603,8 +1583,7 @@ define <vscale x 32 x i8> @intrinsic_vnsra_vi_nxv32i8_nxv32i16_i8(<vscale x 32 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv32i8_nxv32i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnsra.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsra.nxv32i8.nxv32i16(
@@ -1703,8 +1682,7 @@ define <vscale x 4 x i16> @intrinsic_vnsra_vi_nxv4i16_nxv4i32_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv4i16_nxv4i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnsra.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(
@@ -1737,8 +1715,7 @@ define <vscale x 8 x i16> @intrinsic_vnsra_vi_nxv8i16_nxv8i32_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv8i16_nxv8i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnsra.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsra.nxv8i16.nxv8i32(
@@ -1771,8 +1748,7 @@ define <vscale x 16 x i16> @intrinsic_vnsra_vi_nxv16i16_nxv16i32_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv16i16_nxv16i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsra.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsra.nxv16i16.nxv16i32(
@@ -1838,8 +1814,7 @@ define <vscale x 2 x i32> @intrinsic_vnsra_vi_nxv2i32_nxv2i64_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv2i32_nxv2i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsra.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsra.nxv2i32.nxv2i64(
@@ -1872,8 +1847,7 @@ define <vscale x 4 x i32> @intrinsic_vnsra_vi_nxv4i32_nxv4i64_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv4i32_nxv4i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsra.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsra.nxv4i32.nxv4i64(
@@ -1906,8 +1880,7 @@ define <vscale x 8 x i32> @intrinsic_vnsra_vi_nxv8i32_nxv8i64_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv8i32_nxv8i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsra.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsra.nxv8i32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
index 2b912662b4b94..ba6f843b62bd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
@@ -21,8 +21,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsrl_wx_i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -35,8 +34,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsrl_wx_i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -49,8 +47,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i64_nxv8i32(<vscale x 8 x i64> %va, i64 %b)
 ; CHECK-LABEL: vnsrl_wx_i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -129,8 +126,7 @@ define <vscale x 2 x i32> @vnsrl_wv_nxv2i32_sext(<vscale x 2 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
   %x = lshr <vscale x 2 x i64> %va, %vc
@@ -142,8 +138,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i32_nxv2i32_sext(<vscale x 2 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -157,8 +152,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -172,8 +166,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i8_nxv2i32_sext(<vscale x 2 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -187,8 +180,7 @@ define <vscale x 2 x i32> @vnsrl_wi_i32_nxv2i32_sext(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv2i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 2 x i32> splat (i32 15) to <vscale x 2 x i64>
   %x = lshr <vscale x 2 x i64> %va, %vb
@@ -200,8 +192,7 @@ define <vscale x 4 x i32> @vnsrl_wv_nxv4i32_sext(<vscale x 4 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsrl.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
   %x = lshr <vscale x 4 x i64> %va, %vc
@@ -213,8 +204,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i32_nxv4i32_sext(<vscale x 4 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -228,8 +218,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -243,8 +232,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i8_nxv4i32_sext(<vscale x 4 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -258,8 +246,7 @@ define <vscale x 4 x i32> @vnsrl_wi_i32_nxv4i32_sext(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 4 x i32> splat (i32 15) to <vscale x 4 x i64>
   %x = lshr <vscale x 4 x i64> %va, %vb
@@ -271,8 +258,7 @@ define <vscale x 8 x i32> @vnsrl_wv_nxv8i32_sext(<vscale x 8 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsrl.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
   %x = lshr <vscale x 8 x i64> %va, %vc
@@ -284,8 +270,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i32_nxv8i32_sext(<vscale x 8 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -299,8 +284,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -314,8 +298,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i8_nxv8i32_sext(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -329,8 +312,7 @@ define <vscale x 8 x i32> @vnsrl_wi_i32_nxv8i32_sext(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv8i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = sext <vscale x 8 x i32> splat (i32 15) to <vscale x 8 x i64>
   %x = lshr <vscale x 8 x i64> %va, %vb
@@ -408,8 +390,7 @@ define <vscale x 2 x i32> @vnsrl_wv_nxv2i32_zext(<vscale x 2 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
   %x = lshr <vscale x 2 x i64> %va, %vc
@@ -421,8 +402,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i32_nxv2i32_zext(<vscale x 2 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -436,8 +416,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -451,8 +430,7 @@ define <vscale x 2 x i32> @vnsrl_wx_i8_nxv2i32_zext(<vscale x 2 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -466,8 +444,7 @@ define <vscale x 2 x i32> @vnsrl_wi_i32_nxv2i32_zext(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv2i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 2 x i32> splat (i32 15) to <vscale x 2 x i64>
   %x = lshr <vscale x 2 x i64> %va, %vb
@@ -479,8 +456,7 @@ define <vscale x 4 x i32> @vnsrl_wv_nxv4i32_zext(<vscale x 4 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsrl.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
   %x = lshr <vscale x 4 x i64> %va, %vc
@@ -492,8 +468,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i32_nxv4i32_zext(<vscale x 4 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -507,8 +482,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -522,8 +496,7 @@ define <vscale x 4 x i32> @vnsrl_wx_i8_nxv4i32_zext(<vscale x 4 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -537,8 +510,7 @@ define <vscale x 4 x i32> @vnsrl_wi_i32_nxv4i32_zext(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 4 x i32> splat (i32 15) to <vscale x 4 x i64>
   %x = lshr <vscale x 4 x i64> %va, %vb
@@ -550,8 +522,7 @@ define <vscale x 8 x i32> @vnsrl_wv_nxv8i32_zext(<vscale x 8 x i64> %va, <vscale
 ; CHECK-LABEL: vnsrl_wv_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsrl.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
   %x = lshr <vscale x 8 x i64> %va, %vc
@@ -563,8 +534,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i32_nxv8i32_zext(<vscale x 8 x i64> %va, i32
 ; CHECK-LABEL: vnsrl_wx_i32_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -578,8 +548,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vnsrl_wx_i16_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -593,8 +562,7 @@ define <vscale x 8 x i32> @vnsrl_wx_i8_nxv8i32_zext(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vnsrl_wx_i8_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -608,8 +576,7 @@ define <vscale x 8 x i32> @vnsrl_wi_i32_nxv8i32_zext(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vnsrl_wi_i32_nxv8i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 15
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 15
 ; CHECK-NEXT:    ret
   %vb = zext <vscale x 8 x i32> splat (i32 15) to <vscale x 8 x i64>
   %x = lshr <vscale x 8 x i64> %va, %vb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl.ll
index 116e199d324f8..a38ec76053341 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl.ll
@@ -155,8 +155,7 @@ define <vscale x 8 x i8> @intrinsic_vnsrl_wv_nxv8i8_nxv8i16_nxv8i8(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv8i8_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsrl.nxv8i8.nxv8i16.nxv8i8(
@@ -203,8 +202,7 @@ define <vscale x 16 x i8> @intrinsic_vnsrl_wv_nxv16i8_nxv16i16_nxv16i8(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv16i8_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsrl.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsrl.nxv16i8.nxv16i16.nxv16i8(
@@ -251,8 +249,7 @@ define <vscale x 32 x i8> @intrinsic_vnsrl_wv_nxv32i8_nxv32i16_nxv32i8(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv32i8_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsrl.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsrl.nxv32i8.nxv32i16.nxv32i8(
@@ -393,8 +390,7 @@ define <vscale x 4 x i16> @intrinsic_vnsrl_wv_nxv4i16_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv4i16_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32.nxv4i16(
@@ -441,8 +437,7 @@ define <vscale x 8 x i16> @intrinsic_vnsrl_wv_nxv8i16_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv8i16_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsrl.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsrl.nxv8i16.nxv8i32.nxv8i16(
@@ -489,8 +484,7 @@ define <vscale x 16 x i16> @intrinsic_vnsrl_wv_nxv16i16_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv16i16_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsrl.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsrl.nxv16i16.nxv16i32.nxv16i16(
@@ -584,8 +578,7 @@ define <vscale x 2 x i32> @intrinsic_vnsrl_wv_nxv2i32_nxv2i64_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i32_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wv v11, v8, v10
-; CHECK-NEXT:    vmv.v.v v8, v11
+; CHECK-NEXT:    vnsrl.wv v8, v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsrl.nxv2i32.nxv2i64.nxv2i32(
@@ -632,8 +625,7 @@ define <vscale x 4 x i32> @intrinsic_vnsrl_wv_nxv4i32_nxv4i64_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv4i32_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wv v14, v8, v12
-; CHECK-NEXT:    vmv.v.v v8, v14
+; CHECK-NEXT:    vnsrl.wv v8, v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsrl.nxv4i32.nxv4i64.nxv4i32(
@@ -680,8 +672,7 @@ define <vscale x 8 x i32> @intrinsic_vnsrl_wv_nxv8i32_nxv8i64_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv8i32_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wv v20, v8, v16
-; CHECK-NEXT:    vmv.v.v v8, v20
+; CHECK-NEXT:    vnsrl.wv v8, v8, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsrl.nxv8i32.nxv8i64.nxv8i32(
@@ -869,8 +860,7 @@ define <vscale x 8 x i8> @intrinsic_vnsrl_vx_nxv8i8_nxv8i16(<vscale x 8 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsrl.nxv8i8.nxv8i16(
@@ -917,8 +907,7 @@ define <vscale x 16 x i8> @intrinsic_vnsrl_vx_nxv16i8_nxv16i16(<vscale x 16 x i1
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsrl.nxv16i8.nxv16i16(
@@ -965,8 +954,7 @@ define <vscale x 32 x i8> @intrinsic_vnsrl_vx_nxv32i8_nxv32i16(<vscale x 32 x i1
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsrl.nxv32i8.nxv32i16(
@@ -1107,8 +1095,7 @@ define <vscale x 4 x i16> @intrinsic_vnsrl_vx_nxv4i16_nxv4i32(<vscale x 4 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(
@@ -1155,8 +1142,7 @@ define <vscale x 8 x i16> @intrinsic_vnsrl_vx_nxv8i16_nxv8i32(<vscale x 8 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsrl.nxv8i16.nxv8i32(
@@ -1203,8 +1189,7 @@ define <vscale x 16 x i16> @intrinsic_vnsrl_vx_nxv16i16_nxv16i32(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsrl.nxv16i16.nxv16i32(
@@ -1298,8 +1283,7 @@ define <vscale x 2 x i32> @intrinsic_vnsrl_vx_nxv2i32_nxv2i64(<vscale x 2 x i64>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wx v10, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsrl.nxv2i32.nxv2i64(
@@ -1346,8 +1330,7 @@ define <vscale x 4 x i32> @intrinsic_vnsrl_vx_nxv4i32_nxv4i64(<vscale x 4 x i64>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wx v12, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsrl.nxv4i32.nxv4i64(
@@ -1394,8 +1377,7 @@ define <vscale x 8 x i32> @intrinsic_vnsrl_vx_nxv8i32_nxv8i64(<vscale x 8 x i64>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wx v16, v8, a0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsrl.nxv8i32.nxv8i64(
@@ -1535,8 +1517,7 @@ define <vscale x 8 x i8> @intrinsic_vnsrl_vi_nxv8i8_nxv8i16_i8(<vscale x 8 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv8i8_nxv8i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vnsrl.nxv8i8.nxv8i16(
@@ -1569,8 +1550,7 @@ define <vscale x 16 x i8> @intrinsic_vnsrl_vi_nxv16i8_nxv16i16_i8(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv16i8_nxv16i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vnsrl.nxv16i8.nxv16i16(
@@ -1603,8 +1583,7 @@ define <vscale x 32 x i8> @intrinsic_vnsrl_vi_nxv32i8_nxv32i16_i8(<vscale x 32 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv32i8_nxv32i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vnsrl.nxv32i8.nxv32i16(
@@ -1703,8 +1682,7 @@ define <vscale x 4 x i16> @intrinsic_vnsrl_vi_nxv4i16_nxv4i32_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv4i16_nxv4i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(
@@ -1737,8 +1715,7 @@ define <vscale x 8 x i16> @intrinsic_vnsrl_vi_nxv8i16_nxv8i32_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv8i16_nxv8i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vnsrl.nxv8i16.nxv8i32(
@@ -1771,8 +1748,7 @@ define <vscale x 16 x i16> @intrinsic_vnsrl_vi_nxv16i16_nxv16i32_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv16i16_nxv16i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vnsrl.nxv16i16.nxv16i32(
@@ -1838,8 +1814,7 @@ define <vscale x 2 x i32> @intrinsic_vnsrl_vi_nxv2i32_nxv2i64_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv2i32_nxv2i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vnsrl.nxv2i32.nxv2i64(
@@ -1872,8 +1847,7 @@ define <vscale x 4 x i32> @intrinsic_vnsrl_vi_nxv4i32_nxv4i64_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv4i32_nxv4i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vnsrl.nxv4i32.nxv4i64(
@@ -1906,8 +1880,7 @@ define <vscale x 8 x i32> @intrinsic_vnsrl_vi_nxv8i32_nxv8i64_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv8i32_nxv8i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 9
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vnsrl.nxv8i32.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll
index b316f5f878816..718eee50269c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll
@@ -108,8 +108,8 @@ define iXLen @nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %m, i32 %evl) {
 ; RV32-LABEL: nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmsne.vi v10, v8, 0, v0.t
-; RV32-NEXT:    vfirst.m a1, v10, v0.t
+; RV32-NEXT:    vmsne.vi v8, v8, 0, v0.t
+; RV32-NEXT:    vfirst.m a1, v8, v0.t
 ; RV32-NEXT:    bltz a1, .LBB4_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a0, a1
@@ -121,8 +121,8 @@ define iXLen @nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %m, i32 %evl) {
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vmsne.vi v10, v8, 0, v0.t
-; RV64-NEXT:    vfirst.m a1, v10, v0.t
+; RV64-NEXT:    vmsne.vi v8, v8, 0, v0.t
+; RV64-NEXT:    vfirst.m a1, v8, v0.t
 ; RV64-NEXT:    bltz a1, .LBB4_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a0, a1
@@ -136,8 +136,8 @@ define iXLen @nxv2i64_zero_poison(<vscale x 2 x i64> %src, <vscale x 2 x i1> %m,
 ; RV32-LABEL: nxv2i64_zero_poison:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmsne.vi v10, v8, 0, v0.t
-; RV32-NEXT:    vfirst.m a0, v10, v0.t
+; RV32-NEXT:    vmsne.vi v8, v8, 0, v0.t
+; RV32-NEXT:    vfirst.m a0, v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: nxv2i64_zero_poison:
@@ -145,8 +145,8 @@ define iXLen @nxv2i64_zero_poison(<vscale x 2 x i64> %src, <vscale x 2 x i1> %m,
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vmsne.vi v10, v8, 0, v0.t
-; RV64-NEXT:    vfirst.m a0, v10, v0.t
+; RV64-NEXT:    vmsne.vi v8, v8, 0, v0.t
+; RV64-NEXT:    vfirst.m a0, v8, v0.t
 ; RV64-NEXT:    ret
   %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i64(<vscale x 2 x i64> %src, i1 1, <vscale x 2 x i1> %m, i32 %evl)
   ret iXLen %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll b/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
index df003907dc360..0cf1ed69c5f72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-inttoptr-ptrtoint.ll
@@ -20,8 +20,8 @@ define <vscale x 4 x ptr> @inttoptr_nxv4p0_nxv4i16(<vscale x 4 x i16> %va, <vsca
 ; CHECK-LABEL: inttoptr_nxv4p0_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vzext.vf4 v8, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vzext.vf4 v8, v11, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x ptr> @llvm.vp.inttoptr.nxv4p0.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x ptr> %v
@@ -33,8 +33,8 @@ define <vscale x 4 x ptr> @inttoptr_nxv4p0_nxv4i32(<vscale x 4 x i32> %va, <vsca
 ; CHECK-LABEL: inttoptr_nxv4p0_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12, v0.t
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x ptr> @llvm.vp.inttoptr.nxv4p0.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x ptr> %v
@@ -56,9 +56,9 @@ define <vscale x 4 x i8> @ptrtoint_nxv4i8_nxv4p0(<vscale x 4 x ptr> %va, <vscale
 ; CHECK-LABEL: ptrtoint_nxv4i8_nxv4p0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -72,9 +72,9 @@ define <vscale x 4 x i16> @ptrtoint_nxv4i16_nxv4p0(<vscale x 4 x ptr> %va, <vsca
 ; CHECK-LABEL: ptrtoint_nxv4i16_nxv4p0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.ptrtoint.nxv4i16.nxv4p0(<vscale x 4 x ptr> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
@@ -86,8 +86,7 @@ define <vscale x 4 x i32> @ptrtoint_nxv4i32_nxv4p0(<vscale x 4 x ptr> %va, <vsca
 ; CHECK-LABEL: ptrtoint_nxv4i32_nxv4p0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.ptrtoint.nxv4i32.nxv4p0(<vscale x 4 x ptr> %va, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
index 8e44d76e7010f..debeb66526169 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
@@ -170,8 +170,7 @@ define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %sr
 ; CHECK-NEXT:    vrsub.vx v12, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v16, v10, v12, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl)
   ret <vscale x 16 x i1> %dst
@@ -210,8 +209,7 @@ define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %sr
 ; CHECK-NEXT:    vrsub.vx v16, v16, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vrgatherei16.vv v24, v12, v16, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v24, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v24, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl)
   ret <vscale x 32 x i1> %dst
@@ -264,8 +262,7 @@ define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %sr
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v16, v24, a1, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl)
   ret <vscale x 64 x i1> %dst
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll
index e970758a2610b..e0fc4fd3b2fc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll
@@ -70,9 +70,8 @@ define <vscale x 16 x i1> @vp_splat_nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32
 ; CHECK-LABEL: vp_splat_nxv16i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %splat = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x i1> %splat
@@ -82,9 +81,8 @@ define <vscale x 32 x i1> @vp_splat_nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32
 ; CHECK-LABEL: vp_splat_nxv32i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vmsne.vi v8, v12, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %splat = call <vscale x 32 x i1> @llvm.experimental.vp.splat.nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 %evl)
   ret <vscale x 32 x i1> %splat
@@ -94,9 +92,8 @@ define <vscale x 64 x i1> @vp_splat_nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32
 ; CHECK-LABEL: vp_splat_nxv64i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %splat = call <vscale x 64 x i1> @llvm.experimental.vp.splat.nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 %evl)
   ret <vscale x 64 x i1> %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
index 3b0b183537468..d8a1c12b56960 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
@@ -369,8 +369,7 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_masked(<vscale x 16 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vslideup.vx v10, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v10, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> %va, <vscale x 16 x i1> %vb, i32 5, <vscale x 16 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 16 x i1> %v
@@ -443,8 +442,7 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_masked(<vscale x 32 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vslideup.vx v16, v12, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v16, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v16, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x i1> @llvm.experimental.vp.splice.nxv32i1(<vscale x 32 x i1> %va, <vscale x 32 x i1> %vb, i32 5, <vscale x 32 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 32 x i1> %v
@@ -517,8 +515,7 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_masked(<vscale x 64 x i1> %va,
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
 ; CHECK-NEXT:    vslideup.vx v24, v16, a0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v24, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmsne.vi v0, v24, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 64 x i1> @llvm.experimental.vp.splice.nxv64i1(<vscale x 64 x i1> %va, <vscale x 64 x i1> %vb, i32 5, <vscale x 64 x i1> %mask, i32 %evla, i32 %evlb)
   ret <vscale x 64 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 1868154052272..8e80090dfda4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -747,10 +747,10 @@ define {<vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vnsrl.wx v8, v12, a0
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wi v12, v12, 0
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v10, v16, 0
-; RV32-NEXT:    vnsrl.wx v11, v16, a0
+; RV32-NEXT:    vnsrl.wi v10, v12, 0
+; RV32-NEXT:    vnsrl.wx v11, v12, a0
 ; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; RV32-NEXT:    vnsrl.wx v12, v11, a0
 ; RV32-NEXT:    vnsrl.wi v11, v11, 0
@@ -765,10 +765,10 @@ define {<vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1
 ; RV64-NEXT:    li a0, 32
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vnsrl.wx v8, v12, a0
-; RV64-NEXT:    vnsrl.wi v16, v12, 0
+; RV64-NEXT:    vnsrl.wi v12, v12, 0
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v10, v16, 0
-; RV64-NEXT:    vnsrl.wx v11, v16, a0
+; RV64-NEXT:    vnsrl.wi v10, v12, 0
+; RV64-NEXT:    vnsrl.wx v11, v12, a0
 ; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; RV64-NEXT:    vnsrl.wx v12, v11, a0
 ; RV64-NEXT:    vnsrl.wi v11, v11, 0
@@ -805,10 +805,10 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
 ; RV32-NEXT:    vsetvli a4, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vslidedown.vx v8, v12, a3
 ; RV32-NEXT:    vslideup.vx v12, v8, a3
-; RV32-NEXT:    vwaddu.vv v16, v12, v9
-; RV32-NEXT:    vwmaccu.vx v16, a2, v9
+; RV32-NEXT:    vwaddu.vv v14, v12, v9
+; RV32-NEXT:    vwmaccu.vx v14, a2, v9
 ; RV32-NEXT:    vsetvli a3, zero, e32, m2, ta, ma
-; RV32-NEXT:    vwaddu.vv v12, v16, v10
+; RV32-NEXT:    vwaddu.vv v12, v14, v10
 ; RV32-NEXT:    vwmaccu.vx v12, a2, v10
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vse32.v v12, (a0)
@@ -826,10 +826,10 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
 ; RV64-NEXT:    vsetvli a4, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vslidedown.vx v8, v12, a3
 ; RV64-NEXT:    vslideup.vx v12, v8, a3
-; RV64-NEXT:    vwaddu.vv v16, v12, v9
-; RV64-NEXT:    vwmaccu.vx v16, a2, v9
+; RV64-NEXT:    vwaddu.vv v14, v12, v9
+; RV64-NEXT:    vwmaccu.vx v14, a2, v9
 ; RV64-NEXT:    vsetvli a3, zero, e32, m2, ta, ma
-; RV64-NEXT:    vwaddu.vv v12, v16, v10
+; RV64-NEXT:    vwaddu.vv v12, v14, v10
 ; RV64-NEXT:    vwmaccu.vx v12, a2, v10
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
@@ -937,10 +937,10 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 ; RV32-NEXT:    li a0, 32
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vnsrl.wx v12, v8, a0
-; RV32-NEXT:    vnsrl.wi v14, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wx v10, v14, a0
-; RV32-NEXT:    vnsrl.wi v8, v14, 0
+; RV32-NEXT:    vnsrl.wx v10, v8, a0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vnsrl.wx v11, v12, a0
 ; RV32-NEXT:    vnsrl.wi v9, v12, 0
 ; RV32-NEXT:    ret
@@ -955,10 +955,10 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 ; RV64-NEXT:    li a0, 32
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vnsrl.wx v12, v8, a0
-; RV64-NEXT:    vnsrl.wi v14, v8, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wx v10, v14, a0
-; RV64-NEXT:    vnsrl.wi v8, v14, 0
+; RV64-NEXT:    vnsrl.wx v10, v8, a0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vnsrl.wx v11, v12, a0
 ; RV64-NEXT:    vnsrl.wi v9, v12, 0
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index b6ec7906885ff..0241e78f372e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -14,15 +14,13 @@ define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1
 ; RV32-LABEL: vpgather_nxv1i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv1i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i8> %v
@@ -34,15 +32,13 @@ define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2
 ; RV32-LABEL: vpgather_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i8> %v
@@ -180,15 +176,13 @@ define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4
 ; RV32-LABEL: vpgather_nxv4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i8> %v
@@ -198,15 +192,13 @@ define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, i32
 ; RV32-LABEL: vpgather_truemask_nxv4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv1r.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv1r.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x i8> %v
@@ -218,15 +210,13 @@ define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8
 ; RV32-LABEL: vpgather_nxv8i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i8> %v
@@ -350,15 +340,13 @@ define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv1i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv1i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i16> %v
@@ -370,15 +358,13 @@ define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i16> %v
@@ -474,15 +460,13 @@ define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i16> %v
@@ -492,15 +476,13 @@ define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, i
 ; RV32-LABEL: vpgather_truemask_nxv4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x i16> %v
@@ -512,15 +494,13 @@ define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv8i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i16> %v
@@ -531,18 +511,18 @@ define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
   %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -554,18 +534,18 @@ define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vsca
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
@@ -607,9 +587,9 @@ define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
   %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -628,8 +608,7 @@ define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x
 ; RV64-LABEL: vpgather_nxv1i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i32> %v
@@ -647,8 +626,7 @@ define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x
 ; RV64-LABEL: vpgather_nxv2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x i32> %v
@@ -658,9 +636,9 @@ define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr>
 ; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
 ; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32-NEXT:    vsext.vf2 v8, v10
+; RV32-NEXT:    vsext.vf2 v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
@@ -679,9 +657,9 @@ define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr>
 ; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
 ; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32-NEXT:    vzext.vf2 v8, v10
+; RV32-NEXT:    vzext.vf2 v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
@@ -708,8 +686,7 @@ define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x
 ; RV64-LABEL: vpgather_nxv4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x i32> %v
@@ -725,8 +702,7 @@ define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, i
 ; RV64-LABEL: vpgather_truemask_nxv4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x i32> %v
@@ -744,8 +720,7 @@ define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x
 ; RV64-LABEL: vpgather_nxv8i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x i32> %v
@@ -764,9 +739,9 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
   %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -786,9 +761,9 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vsca
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
@@ -801,18 +776,18 @@ define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vsca
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 4
 ; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v10, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
@@ -834,9 +809,9 @@ define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
   %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -857,9 +832,9 @@ define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vsc
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
@@ -936,8 +911,8 @@ define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2i64:
@@ -955,8 +930,8 @@ define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4i64:
@@ -972,8 +947,8 @@ define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, i
 ; RV32-LABEL: vpgather_truemask_nxv4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4i64:
@@ -991,8 +966,8 @@ define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv4r.v v16, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v16, v0.t
+; RV32-NEXT:    vmv4r.v v12, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8i64:
@@ -1009,9 +984,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
@@ -1031,9 +1006,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vsca
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
@@ -1054,18 +1029,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vsca
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v14, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v14, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
@@ -1078,9 +1053,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
@@ -1100,9 +1075,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vsc
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
@@ -1123,18 +1098,18 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vsc
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v12, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
   %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
@@ -1146,9 +1121,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
@@ -1168,9 +1143,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vsc
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
@@ -1191,9 +1166,9 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vsc
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
@@ -1214,10 +1189,10 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64
 ; RV32-LABEL: vpgather_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
-; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i64:
@@ -1237,15 +1212,13 @@ define <vscale x 1 x bfloat> @vpgather_nxv1bf16(<vscale x 1 x ptr> %ptrs, <vscal
 ; RV32-LABEL: vpgather_nxv1bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv1bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x bfloat> %v
@@ -1257,15 +1230,13 @@ define <vscale x 2 x bfloat> @vpgather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscal
 ; RV32-LABEL: vpgather_nxv2bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -1277,15 +1248,13 @@ define <vscale x 4 x bfloat> @vpgather_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscal
 ; RV32-LABEL: vpgather_nxv4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -1295,15 +1264,13 @@ define <vscale x 4 x bfloat> @vpgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptr
 ; RV32-LABEL: vpgather_truemask_nxv4bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x bfloat> %v
@@ -1315,15 +1282,13 @@ define <vscale x 8 x bfloat> @vpgather_nxv8bf16(<vscale x 8 x ptr> %ptrs, <vscal
 ; RV32-LABEL: vpgather_nxv8bf16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x bfloat> %v
@@ -1334,18 +1299,18 @@ define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, <vscal
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
   %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1357,18 +1322,18 @@ define <vscale x 8 x bfloat> @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
   %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
@@ -1410,9 +1375,9 @@ define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8bf16(ptr %base, <vscale x 8 x
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
   %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1425,15 +1390,13 @@ define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv1f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv1f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x half> %v
@@ -1445,15 +1408,13 @@ define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv2f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
-; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -1465,15 +1426,13 @@ define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x half> %v
@@ -1483,15 +1442,13 @@ define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs,
 ; RV32-LABEL: vpgather_truemask_nxv4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vluxei32.v v10, (zero), v8
-; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    vluxei32.v v8, (zero), v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x half> %v
@@ -1503,15 +1460,13 @@ define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x
 ; RV32-LABEL: vpgather_nxv8f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v12
+; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x half> %v
@@ -1522,18 +1477,18 @@ define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
   %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1545,18 +1500,18 @@ define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vsc
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vadd.vv v12, v12, v12
+; RV32-NEXT:    vadd.vv v8, v12, v12
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
@@ -1598,9 +1553,9 @@ define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i1
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vadd.vv v16, v16, v16
+; RV64-NEXT:    vadd.vv v8, v16, v16
 ; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
   %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1619,8 +1574,7 @@ define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale
 ; RV64-LABEL: vpgather_nxv1f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
-; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x float> %v
@@ -1638,8 +1592,7 @@ define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale
 ; RV64-LABEL: vpgather_nxv2f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x float> %v
@@ -1657,8 +1610,7 @@ define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale
 ; RV64-LABEL: vpgather_nxv4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1674,8 +1626,7 @@ define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs,
 ; RV64-LABEL: vpgather_truemask_nxv4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vluxei64.v v12, (zero), v8
-; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    vluxei64.v v8, (zero), v8
 ; RV64-NEXT:    ret
   %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
   ret <vscale x 4 x float> %v
@@ -1693,8 +1644,7 @@ define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale
 ; RV64-LABEL: vpgather_nxv8f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    ret
   %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x float> %v
@@ -1713,9 +1663,9 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
   %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1735,9 +1685,9 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vs
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf8 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
@@ -1750,18 +1700,18 @@ define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vs
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 4
 ; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v12, v8, a2
+; RV32-NEXT:    vwmulu.vx v10, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v12, v8, a2
+; RV64-NEXT:    vwmulu.vx v10, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
@@ -1783,9 +1733,9 @@ define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
   %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
@@ -1806,9 +1756,9 @@ define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <v
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsext.vf4 v16, v8
-; RV64-NEXT:    vsll.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v8, v16, 2
 ; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
   %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
@@ -1885,8 +1835,8 @@ define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale
 ; RV32-LABEL: vpgather_nxv2f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmv1r.v v10, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv2f64:
@@ -1904,8 +1854,8 @@ define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale
 ; RV32-LABEL: vpgather_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv4f64:
@@ -1921,8 +1871,8 @@ define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs
 ; RV32-LABEL: vpgather_truemask_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v12
+; RV32-NEXT:    vmv2r.v v10, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_truemask_nxv4f64:
@@ -1940,8 +1890,8 @@ define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale
 ; RV32-LABEL: vpgather_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv4r.v v16, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v16, v0.t
+; RV32-NEXT:    vmv4r.v v12, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv6f64:
@@ -1958,9 +1908,9 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
@@ -1980,9 +1930,9 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <v
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
@@ -2003,18 +1953,18 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <v
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v14, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v14, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
@@ -2027,9 +1977,9 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscal
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
@@ -2049,9 +1999,9 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
@@ -2072,18 +2022,18 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v12, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
@@ -2095,9 +2045,9 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscal
 ; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
@@ -2117,9 +2067,9 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <
 ; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
@@ -2140,9 +2090,9 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <
 ; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
@@ -2163,10 +2113,10 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x
 ; RV32-LABEL: vpgather_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
-; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv6f64:
@@ -2186,8 +2136,8 @@ define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale
 ; RV32-LABEL: vpgather_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv4r.v v16, v8
-; RV32-NEXT:    vluxei32.v v8, (zero), v16, v0.t
+; RV32-NEXT:    vmv4r.v v12, v8
+; RV32-NEXT:    vluxei32.v v8, (zero), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_nxv8f64:
@@ -2204,9 +2154,9 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
@@ -2226,9 +2176,9 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <v
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
 ; RV32-NEXT:    vsext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v16, v12, 3
+; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
@@ -2249,18 +2199,18 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <v
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v14, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v14, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei16.v v8, (a0), v14, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
@@ -2273,9 +2223,9 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscal
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
@@ -2295,9 +2245,9 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulsu.vx v16, v8, a2
+; RV32-NEXT:    vwmulsu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
@@ -2318,18 +2268,18 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 8
 ; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV32-NEXT:    vwmulu.vx v16, v8, a2
+; RV32-NEXT:    vwmulu.vx v12, v8, a2
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 8
 ; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; RV64-NEXT:    vwmulu.vx v16, v8, a2
+; RV64-NEXT:    vwmulu.vx v12, v8, a2
 ; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
@@ -2341,9 +2291,9 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscal
 ; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
@@ -2363,9 +2313,9 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <
 ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
@@ -2386,9 +2336,9 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <
 ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
@@ -2409,10 +2359,10 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x
 ; RV32-LABEL: vpgather_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
-; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsll.vi v12, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv8f64:
@@ -2483,14 +2433,14 @@ define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, <vs
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulsu.vx v24, v8, a3
+; RV32-NEXT:    vwmulsu.vx v16, v8, a3
 ; RV32-NEXT:    mv a3, a1
 ; RV32-NEXT:    bltu a1, a2, .LBB112_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:  .LBB112_2:
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    sub a3, a1, a2
 ; RV32-NEXT:    srli a2, a2, 3
 ; RV32-NEXT:    sltu a1, a1, a3
@@ -2499,7 +2449,7 @@ define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, <vs
 ; RV32-NEXT:    vslidedown.vx v0, v0, a2
 ; RV32-NEXT:    and a1, a1, a3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v20, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
@@ -2540,14 +2490,14 @@ define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulsu.vx v24, v8, a3
+; RV32-NEXT:    vwmulsu.vx v16, v8, a3
 ; RV32-NEXT:    mv a3, a1
 ; RV32-NEXT:    bltu a1, a2, .LBB113_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:  .LBB113_2:
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    sub a3, a1, a2
 ; RV32-NEXT:    srli a2, a2, 3
 ; RV32-NEXT:    sltu a1, a1, a3
@@ -2556,7 +2506,7 @@ define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base
 ; RV32-NEXT:    vslidedown.vx v0, v0, a2
 ; RV32-NEXT:    and a1, a1, a3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v20, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
@@ -2598,14 +2548,14 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulu.vx v24, v8, a3
+; RV32-NEXT:    vwmulu.vx v16, v8, a3
 ; RV32-NEXT:    mv a3, a1
 ; RV32-NEXT:    bltu a1, a2, .LBB114_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:  .LBB114_2:
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    sub a3, a1, a2
 ; RV32-NEXT:    srli a2, a2, 3
 ; RV32-NEXT:    sltu a1, a1, a3
@@ -2614,7 +2564,7 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
 ; RV32-NEXT:    vslidedown.vx v0, v0, a2
 ; RV32-NEXT:    and a1, a1, a3
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    vluxei32.v v16, (a0), v20, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
@@ -2622,14 +2572,14 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
 ; RV64-NEXT:    li a3, 8
 ; RV64-NEXT:    csrr a2, vlenb
 ; RV64-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV64-NEXT:    vwmulu.vx v24, v8, a3
+; RV64-NEXT:    vwmulu.vx v16, v8, a3
 ; RV64-NEXT:    mv a3, a1
 ; RV64-NEXT:    bltu a1, a2, .LBB114_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a3, a2
 ; RV64-NEXT:  .LBB114_2:
 ; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    sub a3, a1, a2
 ; RV64-NEXT:    srli a2, a2, 3
 ; RV64-NEXT:    sltu a1, a1, a3
@@ -2638,7 +2588,7 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
 ; RV64-NEXT:    vslidedown.vx v0, v0, a2
 ; RV64-NEXT:    and a1, a1, a3
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV64-NEXT:    vluxei32.v v16, (a0), v20, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
   %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 6e0aee18c6c74..abbe98cacde26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -51,8 +51,8 @@ define <vscale x 2 x i1> @vpmerge_nxv2i1(<vscale x 2 x i1> %va, <vscale x 2 x i1
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vmsltu.vx v12, v10, a0
-; RV64-NEXT:    vmand.mm v9, v9, v12
+; RV64-NEXT:    vmsltu.vx v10, v10, a0
+; RV64-NEXT:    vmand.mm v9, v9, v10
 ; RV64-NEXT:    vmandn.mm v8, v8, v9
 ; RV64-NEXT:    vmand.mm v9, v0, v9
 ; RV64-NEXT:    vmor.mm v0, v9, v8
@@ -66,8 +66,8 @@ define <vscale x 4 x i1> @vpmerge_nxv4i1(<vscale x 4 x i1> %va, <vscale x 4 x i1
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vmsltu.vx v12, v10, a0
-; RV32-NEXT:    vmand.mm v9, v9, v12
+; RV32-NEXT:    vmsltu.vx v10, v10, a0
+; RV32-NEXT:    vmand.mm v9, v9, v10
 ; RV32-NEXT:    vmandn.mm v8, v8, v9
 ; RV32-NEXT:    vmand.mm v9, v0, v9
 ; RV32-NEXT:    vmor.mm v0, v9, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index cf8c06fb91089..9d38dbac2f5c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -90,9 +90,9 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
@@ -101,9 +101,9 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscal
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
@@ -245,18 +245,18 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vsca
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
@@ -544,15 +544,15 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vsca
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
-; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
+; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
-; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
+; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
   call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
@@ -1069,8 +1069,8 @@ define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vsca
 ; RV32-LABEL: vpscatter_baseidx_nxv8i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vsll.vi v16, v24, 3
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1988,8 +1988,8 @@ define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <v
 ; RV32-LABEL: vpscatter_baseidx_nxv6f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vsll.vi v16, v24, 3
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2233,8 +2233,8 @@ define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <v
 ; RV32-LABEL: vpscatter_baseidx_nxv8f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
-; RV32-NEXT:    vsll.vi v16, v24, 3
+; RV32-NEXT:    vnsrl.wi v16, v16, 0
+; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2322,11 +2322,11 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
 define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    vl4re16.v v28, (a1)
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulsu.vx v24, v4, a3
+; RV32-NEXT:    vwmulsu.vx v24, v28, a3
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:    bltu a2, a1, .LBB109_2
 ; RV32-NEXT:  # %bb.1:
@@ -2395,11 +2395,11 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
 define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    vl4re16.v v28, (a1)
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulsu.vx v24, v4, a3
+; RV32-NEXT:    vwmulsu.vx v24, v28, a3
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:    bltu a2, a1, .LBB110_2
 ; RV32-NEXT:  # %bb.1:
@@ -2469,11 +2469,11 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    vl4re16.v v28, (a1)
 ; RV32-NEXT:    li a3, 8
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV32-NEXT:    vwmulu.vx v24, v4, a3
+; RV32-NEXT:    vwmulu.vx v24, v28, a3
 ; RV32-NEXT:    mv a3, a2
 ; RV32-NEXT:    bltu a2, a1, .LBB111_2
 ; RV32-NEXT:  # %bb.1:
@@ -2494,11 +2494,11 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 ;
 ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vl4re16.v v4, (a1)
+; RV64-NEXT:    vl4re16.v v28, (a1)
 ; RV64-NEXT:    li a3, 8
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    vsetvli a4, zero, e16, m4, ta, ma
-; RV64-NEXT:    vwmulu.vx v24, v4, a3
+; RV64-NEXT:    vwmulu.vx v24, v28, a3
 ; RV64-NEXT:    mv a3, a2
 ; RV64-NEXT:    bltu a2, a1, .LBB111_2
 ; RV64-NEXT:  # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
index 25a226e60e715..9a87d09b7ba12 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
@@ -1008,8 +1008,8 @@ define <vscale x 2 x i64> @vrol_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v10, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v14, 0
-; CHECK-RV32-NEXT:    vwsub.vx v12, v14, a0
+; CHECK-RV32-NEXT:    vmv.v.i v13, 0
+; CHECK-RV32-NEXT:    vwsub.vx v12, v13, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v10, v10, a0
@@ -1071,8 +1071,8 @@ define <vscale x 4 x i64> @vrol_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v12, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v20, 0
-; CHECK-RV32-NEXT:    vwsub.vx v16, v20, a0
+; CHECK-RV32-NEXT:    vmv.v.i v18, 0
+; CHECK-RV32-NEXT:    vwsub.vx v16, v18, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v12, v12, a0
@@ -1134,8 +1134,8 @@ define <vscale x 8 x i64> @vrol_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v16, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v4, 0
-; CHECK-RV32-NEXT:    vwsub.vx v24, v4, a0
+; CHECK-RV32-NEXT:    vmv.v.i v28, 0
+; CHECK-RV32-NEXT:    vwsub.vx v24, v28, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v16, v16, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
index 9e63b613ab70b..8985a51d45ddd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
@@ -1714,8 +1714,8 @@ define <vscale x 2 x i64> @vror_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v10, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v14, 0
-; CHECK-RV32-NEXT:    vwsub.vx v12, v14, a0
+; CHECK-RV32-NEXT:    vmv.v.i v13, 0
+; CHECK-RV32-NEXT:    vwsub.vx v12, v13, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v10, v10, a0
@@ -1816,8 +1816,8 @@ define <vscale x 4 x i64> @vror_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v12, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v20, 0
-; CHECK-RV32-NEXT:    vwsub.vx v16, v20, a0
+; CHECK-RV32-NEXT:    vmv.v.i v18, 0
+; CHECK-RV32-NEXT:    vwsub.vx v16, v18, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v12, v12, a0
@@ -1918,8 +1918,8 @@ define <vscale x 8 x i64> @vror_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
 ; CHECK-RV32-NEXT:    vmv.v.x v16, a0
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-RV32-NEXT:    vmv.v.i v4, 0
-; CHECK-RV32-NEXT:    vwsub.vx v24, v4, a0
+; CHECK-RV32-NEXT:    vmv.v.i v28, 0
+; CHECK-RV32-NEXT:    vwsub.vx v24, v28, a0
 ; CHECK-RV32-NEXT:    li a0, 63
 ; CHECK-RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; CHECK-RV32-NEXT:    vand.vx v16, v16, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index 206838917d004..dda6a21cf5e82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -107,13 +107,13 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i16i32_multiple_users(ptr %x, ptr %y
 define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
 ; NO_FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
 ; NO_FOLDING:       # %bb.0:
-; NO_FOLDING-NEXT:    vl1re32.v v10, (a0)
-; NO_FOLDING-NEXT:    vl1re32.v v12, (a1)
-; NO_FOLDING-NEXT:    vl1re32.v v14, (a2)
+; NO_FOLDING-NEXT:    vl1re32.v v9, (a0)
+; NO_FOLDING-NEXT:    vl1re32.v v11, (a1)
+; NO_FOLDING-NEXT:    vl1re32.v v13, (a2)
 ; NO_FOLDING-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; NO_FOLDING-NEXT:    vsext.vf2 v8, v10
-; NO_FOLDING-NEXT:    vsext.vf2 v10, v12
-; NO_FOLDING-NEXT:    vsext.vf2 v12, v14
+; NO_FOLDING-NEXT:    vsext.vf2 v8, v9
+; NO_FOLDING-NEXT:    vsext.vf2 v10, v11
+; NO_FOLDING-NEXT:    vsext.vf2 v12, v13
 ; NO_FOLDING-NEXT:    vmul.vv v10, v8, v10
 ; NO_FOLDING-NEXT:    vadd.vv v14, v8, v12
 ; NO_FOLDING-NEXT:    vsub.vv v8, v8, v12
@@ -123,13 +123,13 @@ define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y
 ;
 ; FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
 ; FOLDING:       # %bb.0:
-; FOLDING-NEXT:    vl1re32.v v14, (a0)
-; FOLDING-NEXT:    vl1re32.v v10, (a1)
-; FOLDING-NEXT:    vl1re32.v v15, (a2)
+; FOLDING-NEXT:    vl1re32.v v13, (a0)
+; FOLDING-NEXT:    vl1re32.v v9, (a1)
+; FOLDING-NEXT:    vl1re32.v v14, (a2)
 ; FOLDING-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; FOLDING-NEXT:    vwmul.vv v8, v14, v10
-; FOLDING-NEXT:    vwadd.vv v10, v14, v15
-; FOLDING-NEXT:    vwsub.vv v12, v14, v15
+; FOLDING-NEXT:    vwmul.vv v8, v13, v9
+; FOLDING-NEXT:    vwadd.vv v10, v13, v14
+; FOLDING-NEXT:    vwsub.vv v12, v13, v14
 ; FOLDING-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; FOLDING-NEXT:    vor.vv v8, v8, v10
 ; FOLDING-NEXT:    vor.vv v8, v8, v12
@@ -398,13 +398,13 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i16i32_multiple_users(ptr %x, ptr %y
 define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
 ; NO_FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
 ; NO_FOLDING:       # %bb.0:
-; NO_FOLDING-NEXT:    vl1re32.v v10, (a0)
-; NO_FOLDING-NEXT:    vl1re32.v v12, (a1)
-; NO_FOLDING-NEXT:    vl1re32.v v14, (a2)
+; NO_FOLDING-NEXT:    vl1re32.v v9, (a0)
+; NO_FOLDING-NEXT:    vl1re32.v v11, (a1)
+; NO_FOLDING-NEXT:    vl1re32.v v13, (a2)
 ; NO_FOLDING-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; NO_FOLDING-NEXT:    vzext.vf2 v8, v10
-; NO_FOLDING-NEXT:    vzext.vf2 v10, v12
-; NO_FOLDING-NEXT:    vzext.vf2 v12, v14
+; NO_FOLDING-NEXT:    vzext.vf2 v8, v9
+; NO_FOLDING-NEXT:    vzext.vf2 v10, v11
+; NO_FOLDING-NEXT:    vzext.vf2 v12, v13
 ; NO_FOLDING-NEXT:    vmul.vv v10, v8, v10
 ; NO_FOLDING-NEXT:    vadd.vv v14, v8, v12
 ; NO_FOLDING-NEXT:    vsub.vv v8, v8, v12
@@ -414,13 +414,13 @@ define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y
 ;
 ; FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
 ; FOLDING:       # %bb.0:
-; FOLDING-NEXT:    vl1re32.v v14, (a0)
-; FOLDING-NEXT:    vl1re32.v v10, (a1)
-; FOLDING-NEXT:    vl1re32.v v15, (a2)
+; FOLDING-NEXT:    vl1re32.v v13, (a0)
+; FOLDING-NEXT:    vl1re32.v v9, (a1)
+; FOLDING-NEXT:    vl1re32.v v14, (a2)
 ; FOLDING-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; FOLDING-NEXT:    vwmulu.vv v8, v14, v10
-; FOLDING-NEXT:    vwaddu.vv v10, v14, v15
-; FOLDING-NEXT:    vwsubu.vv v12, v14, v15
+; FOLDING-NEXT:    vwmulu.vv v8, v13, v9
+; FOLDING-NEXT:    vwaddu.vv v10, v13, v14
+; FOLDING-NEXT:    vwsubu.vv v12, v13, v14
 ; FOLDING-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; FOLDING-NEXT:    vor.vv v8, v8, v10
 ; FOLDING-NEXT:    vor.vv v8, v8, v12
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
index eec2a5f3efcfb..5bde13d3fa566 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll
@@ -128,8 +128,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64(<vscale x 2 x i32> %a, <vscale
 ; CHECK-LABEL: vsext_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i64> %v
@@ -139,8 +139,8 @@ define <vscale x 2 x i64> @vsext_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i32> %a,
 ; CHECK-LABEL: vsext_nxv2i32_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext.ll b/llvm/test/CodeGen/RISCV/rvv/vsext.ll
index bb83dbc98401a..dc2903f25429e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext.ll
@@ -145,8 +145,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf8_nxv8i64(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vsext_vf8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vsext.vf8 v8, v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vsext.vf8 v8, v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i8(
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @intrinsic_vsext_vf4_nxv4i64(<vscale x 4 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i16(
@@ -321,8 +321,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf4_nxv8i64(<vscale x 8 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i16(
@@ -497,8 +497,8 @@ define <vscale x 8 x i32> @intrinsic_vsext_vf4_nxv8i32(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
@@ -541,8 +541,8 @@ define <vscale x 16 x i32> @intrinsic_vsext_vf4_nxv16i32(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vsext_vf4_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
@@ -629,8 +629,8 @@ define <vscale x 2 x i64> @intrinsic_vsext_vf2_nxv2i64(<vscale x 2 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i32(
@@ -673,8 +673,8 @@ define <vscale x 4 x i64> @intrinsic_vsext_vf2_nxv4i64(<vscale x 4 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i32(
@@ -717,8 +717,8 @@ define <vscale x 8 x i64> @intrinsic_vsext_vf2_nxv8i64(<vscale x 8 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i32(
@@ -849,8 +849,8 @@ define <vscale x 4 x i32> @intrinsic_vsext_vf2_nxv4i32(<vscale x 4 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
@@ -893,8 +893,8 @@ define <vscale x 8 x i32> @intrinsic_vsext_vf2_nxv8i32(<vscale x 8 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
@@ -937,8 +937,8 @@ define <vscale x 16 x i32> @intrinsic_vsext_vf2_nxv16i32(<vscale x 16 x i16> %0,
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i16> @intrinsic_vsext_vf2_nxv8i16(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
@@ -1157,8 +1157,8 @@ define <vscale x 16 x i16> @intrinsic_vsext_vf2_nxv16i16(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
@@ -1201,8 +1201,8 @@ define <vscale x 32 x i16> @intrinsic_vsext_vf2_nxv32i16(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vsext_vf2_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp-mask.ll
index ec16e58f6e57d..78f460805db96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp-mask.ll
@@ -9,9 +9,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i1(<vscale x 2 x i1> %va, <vs
 ; CHECK-NEXT:    vmv.v.i v9, 0
 ; CHECK-NEXT:    vmerge.vim v9, v9, -1, v0
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i1(<vscale x 2 x i1> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -23,9 +23,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i1_unmasked(<vscale x 2 x i1>
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT:    vfcvt.f.x.v v9, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i1(<vscale x 2 x i1> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
index f69ae3d560ef7..a6d4542f7d525 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll
@@ -12,9 +12,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i7(<vscale x 2 x i7> %va, <vs
 ; CHECK-NEXT:    vsra.vi v8, v8, 1
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i7(<vscale x 2 x i7> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -27,9 +27,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i8(<vscale x 2 x i8> %va, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -40,9 +40,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i8_unmasked(<vscale x 2 x i8>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vsext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v9
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -80,9 +80,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i32(<vscale x 2 x i32> %va, <
 ; CHECK-LABEL: vsitofp_nxv2bf16_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v9, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -92,9 +92,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i32_unmasked(<vscale x 2 x i3
 ; CHECK-LABEL: vsitofp_nxv2bf16_nxv2i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.x.v v9, v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -106,9 +106,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i64(<vscale x 2 x i64> %va, <
 ; CHECK-LABEL: vsitofp_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -118,9 +118,9 @@ define <vscale x 2 x bfloat> @vsitofp_nxv2bf16_nxv2i64_unmasked(<vscale x 2 x i6
 ; CHECK-LABEL: vsitofp_nxv2bf16_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.sitofp.nxv2bf16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -144,9 +144,9 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i7(<vscale x 2 x i7> %va, <vscal
 ; ZVFHMIN-NEXT:    vsra.vi v8, v8, 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i7(<vscale x 2 x i7> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -166,9 +166,9 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i8(<vscale x 2 x i8> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -186,9 +186,9 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i8_unmasked(<vscale x 2 x i8> %v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -238,16 +238,15 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i32(<vscale x 2 x i32> %va, <vsc
 ; ZVFH-LABEL: vsitofp_nxv2f16_nxv2i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv2f16_nxv2i32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -257,16 +256,15 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i32_unmasked(<vscale x 2 x i32>
 ; ZVFH-LABEL: vsitofp_nxv2f16_nxv2i32_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv2f16_nxv2i32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -278,17 +276,17 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i64(<vscale x 2 x i64> %va, <vsc
 ; ZVFH-LABEL: vsitofp_nxv2f16_nxv2i64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v10, v8, v0.t
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv2f16_nxv2i64:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.x.w v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -298,17 +296,17 @@ define <vscale x 2 x half> @vsitofp_nxv2f16_nxv2i64_unmasked(<vscale x 2 x i64>
 ; ZVFH-LABEL: vsitofp_nxv2f16_nxv2i64_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.x.w v10, v8
+; ZVFH-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vsitofp_nxv2f16_nxv2i64_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.x.w v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.x.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -390,8 +388,7 @@ define <vscale x 2 x float> @vsitofp_nxv2f32_nxv2i64(<vscale x 2 x i64> %va, <vs
 ; CHECK-LABEL: vsitofp_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x float> %v
@@ -401,8 +398,7 @@ define <vscale x 2 x float> @vsitofp_nxv2f32_nxv2i64_unmasked(<vscale x 2 x i64>
 ; CHECK-LABEL: vsitofp_nxv2f32_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.x.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x float> %v
@@ -414,8 +410,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i8(<vscale x 2 x i8> %va, <vsc
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vsext.vf4 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -425,8 +421,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i8_unmasked(<vscale x 2 x i8>
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -438,8 +434,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i16(<vscale x 2 x i16> %va, <v
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vsext.vf2 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -449,8 +445,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i16_unmasked(<vscale x 2 x i16
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -462,8 +458,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i32(<vscale x 2 x i32> %va, <v
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -473,8 +469,8 @@ define <vscale x 2 x double> @vsitofp_nxv2f64_nxv2i32_unmasked(<vscale x 2 x i32
 ; CHECK-LABEL: vsitofp_nxv2f64_nxv2i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
index ad8097631acd3..cbb7cdec8d6ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
@@ -56,9 +56,8 @@ define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x
 ; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vand.vi v10, v8, 1, v0.t
-; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i1> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
index 1c687ef23bfa8..35fe29f5d014f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
@@ -80,9 +80,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i64(<vscale x 2 x i64> %a, <vscale x
 ; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
@@ -94,9 +94,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i64_unmasked(<vscale x 2 x i64> %a,
 ; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -132,9 +132,9 @@ define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i64(<vscale x 2 x i64> %a, <vscale
 ; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i16> %v
@@ -144,9 +144,9 @@ define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i64_unmasked(<vscale x 2 x i64> %a
 ; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x i16> %v
@@ -176,9 +176,9 @@ define <vscale x 15 x i16> @vtrunc_nxv15i16_nxv15i64(<vscale x 15 x i64> %a, <vs
 ; CHECK-NEXT:  .LBB12_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v20, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v20, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
 ; CHECK-NEXT:    vmv4r.v v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 15 x i16> @llvm.vp.trunc.nxv15i16.nxv15i64(<vscale x 15 x i64> %a, <vscale x 15 x i1> %m, i32 %vl)
@@ -191,8 +191,7 @@ define <vscale x 2 x i32> @vtrunc_nxv2i32_nxv2i64(<vscale x 2 x i64> %a, <vscale
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i32> %v
@@ -202,8 +201,7 @@ define <vscale x 2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i64> %a
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x i32> %v
@@ -234,9 +232,9 @@ define <vscale x 32 x i7> @vtrunc_nxv32i7_nxv32i32(<vscale x 32 x i32> %a, <vsca
 ; CHECK-NEXT:  .LBB15_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v20, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v20, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
 ; CHECK-NEXT:    vmv4r.v v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x i7> @llvm.vp.trunc.nxv32i7.nxv32i32(<vscale x 32 x i32> %a, <vscale x 32 x i1> %m, i32 %vl)
@@ -268,9 +266,9 @@ define <vscale x 32 x i8> @vtrunc_nxv32i8_nxv32i32(<vscale x 32 x i32> %a, <vsca
 ; CHECK-NEXT:  .LBB16_2:
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v20, v8, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v20, 0, v0.t
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
 ; CHECK-NEXT:    vmv4r.v v8, v16
 ; CHECK-NEXT:    ret
   %v = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i32(<vscale x 32 x i32> %a, <vscale x 32 x i1> %m, i32 %vl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
index e86d6045df297..d4a1c12e31f6a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
@@ -36,8 +36,7 @@ define <vscale x 8 x i8> @vtrunc_nxv8i16_nxv8i8(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i16> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %tvec
@@ -47,8 +46,7 @@ define <vscale x 16 x i8> @vtrunc_nxv16i16_nxv16i8(<vscale x 16 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i16> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %tvec
@@ -102,9 +100,9 @@ define <vscale x 4 x i8> @vtrunc_nxv4i32_nxv4i8(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i32> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -114,8 +112,7 @@ define <vscale x 4 x i16> @vtrunc_nxv4i32_nxv4i16(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i32> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %tvec
@@ -125,9 +122,9 @@ define <vscale x 8 x i8> @vtrunc_nxv8i32_nxv8i8(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i32> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %tvec
@@ -137,8 +134,7 @@ define <vscale x 8 x i16> @vtrunc_nxv8i32_nxv8i16(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i32> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %tvec
@@ -148,9 +144,9 @@ define <vscale x 16 x i8> @vtrunc_nxv16i32_nxv16i8(<vscale x 16 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i32> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %tvec
@@ -160,8 +156,7 @@ define <vscale x 16 x i16> @vtrunc_nxv16i32_nxv16i16(<vscale x 16 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i32> %va to <vscale x 16 x i16>
   ret <vscale x 16 x i16> %tvec
@@ -207,9 +202,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i64_nxv2i8(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -221,9 +216,9 @@ define <vscale x 2 x i16> @vtrunc_nxv2i64_nxv2i16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %tvec
@@ -233,8 +228,7 @@ define <vscale x 2 x i32> @vtrunc_nxv2i64_nxv2i32(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i32>
   ret <vscale x 2 x i32> %tvec
@@ -244,9 +238,9 @@ define <vscale x 4 x i8> @vtrunc_nxv4i64_nxv4i8(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
@@ -258,9 +252,9 @@ define <vscale x 4 x i16> @vtrunc_nxv4i64_nxv4i16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i64> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %tvec
@@ -270,8 +264,7 @@ define <vscale x 4 x i32> @vtrunc_nxv4i64_nxv4i32(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i64> %va to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %tvec
@@ -281,11 +274,11 @@ define <vscale x 8 x i8> @vtrunc_nxv8i64_nxv8i8(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %tvec
@@ -295,9 +288,9 @@ define <vscale x 8 x i16> @vtrunc_nxv8i64_nxv8i16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %tvec
@@ -307,8 +300,7 @@ define <vscale x 8 x i32> @vtrunc_nxv8i64_nxv8i32(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
-; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i32>
   ret <vscale x 8 x i32> %tvec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp-mask.ll
index 616dc697b2847..d5d9c71bf5a8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp-mask.ll
@@ -9,9 +9,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i1(<vscale x 2 x i1> %va, <vs
 ; CHECK-NEXT:    vmv.v.i v9, 0
 ; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v9, v0.t
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i1(<vscale x 2 x i1> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -23,9 +23,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i1_unmasked(<vscale x 2 x i1>
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i1(<vscale x 2 x i1> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
index e1edaaadadf1d..c48bd675e8fb5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll
@@ -12,9 +12,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i7(<vscale x 2 x i7> %va, <vs
 ; CHECK-NEXT:    vand.vx v8, v8, a1
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i7(<vscale x 2 x i7> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -25,9 +25,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i8(<vscale x 2 x i8> %va, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -38,9 +38,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i8_unmasked(<vscale x 2 x i8>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v9, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -74,9 +74,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i32(<vscale x 2 x i32> %va, <
 ; CHECK-LABEL: vuitofp_nxv2bf16_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -86,9 +86,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i32_unmasked(<vscale x 2 x i3
 ; CHECK-LABEL: vuitofp_nxv2bf16_nxv2i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfcvt.f.xu.v v9, v8
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -98,9 +98,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i64(<vscale x 2 x i64> %va, <
 ; CHECK-LABEL: vuitofp_nxv2bf16_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -110,9 +110,9 @@ define <vscale x 2 x bfloat> @vuitofp_nxv2bf16_nxv2i64_unmasked(<vscale x 2 x i6
 ; CHECK-LABEL: vuitofp_nxv2bf16_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.vp.uitofp.nxv2bf16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x bfloat> %v
@@ -136,9 +136,9 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i7(<vscale x 2 x i7> %va, <vscal
 ; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i7(<vscale x 2 x i7> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -158,9 +158,9 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i8(<vscale x 2 x i8> %va, <vscal
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -178,9 +178,9 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i8_unmasked(<vscale x 2 x i8> %v
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -230,16 +230,15 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i32(<vscale x 2 x i32> %va, <vsc
 ; ZVFH-LABEL: vuitofp_nxv2f16_nxv2i32:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8, v0.t
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv2f16_nxv2i32:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8, v0.t
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -249,16 +248,15 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i32_unmasked(<vscale x 2 x i32>
 ; ZVFH-LABEL: vuitofp_nxv2f16_nxv2i32_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v9, v8
-; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv2f16_nxv2i32_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfcvt.f.xu.v v9, v8
+; ZVFHMIN-NEXT:    vfcvt.f.xu.v v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -270,17 +268,17 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i64(<vscale x 2 x i64> %va, <vsc
 ; ZVFH-LABEL: vuitofp_nxv2f16_nxv2i64:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10, v0.t
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8, v0.t
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv2f16_nxv2i64:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
+; ZVFHMIN-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x half> %v
@@ -290,17 +288,17 @@ define <vscale x 2 x half> @vuitofp_nxv2f16_nxv2i64_unmasked(<vscale x 2 x i64>
 ; ZVFH-LABEL: vuitofp_nxv2f16_nxv2i64_unmasked:
 ; ZVFH:       # %bb.0:
 ; ZVFH-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFH-NEXT:    vfncvt.f.xu.w v10, v8
+; ZVFH-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFH-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: vuitofp_nxv2f16_nxv2i64_unmasked:
 ; ZVFHMIN:       # %bb.0:
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.xu.w v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.xu.w v8, v8
 ; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v8
 ; ZVFHMIN-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.vp.uitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x half> %v
@@ -382,8 +380,7 @@ define <vscale x 2 x float> @vuitofp_nxv2f32_nxv2i64(<vscale x 2 x i64> %va, <vs
 ; CHECK-LABEL: vuitofp_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x float> %v
@@ -393,8 +390,7 @@ define <vscale x 2 x float> @vuitofp_nxv2f32_nxv2i64_unmasked(<vscale x 2 x i64>
 ; CHECK-LABEL: vuitofp_nxv2f32_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vfncvt.f.xu.w v8, v8
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x float> %v
@@ -406,8 +402,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i8(<vscale x 2 x i8> %va, <vsc
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vzext.vf4 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -417,8 +413,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i8_unmasked(<vscale x 2 x i8>
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i8_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf4 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -430,8 +426,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i16(<vscale x 2 x i16> %va, <v
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -441,8 +437,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i16_unmasked(<vscale x 2 x i16
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i16_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vzext.vf2 v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
@@ -454,8 +450,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i32(<vscale x 2 x i32> %va, <v
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
   ret <vscale x 2 x double> %v
@@ -465,8 +461,8 @@ define <vscale x 2 x double> @vuitofp_nxv2f64_nxv2i32_unmasked(<vscale x 2 x i32
 ; CHECK-LABEL: vuitofp_nxv2f64_nxv2i32_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
   ret <vscale x 2 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
index 747fb5a3e7ff8..21211670ede0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
@@ -41,13 +41,12 @@ define <vscale x 8 x i64> @vwaddu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
 define <vscale x 8 x i64> @vwaddu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
 ; CHECK-LABEL: vwaddu_vv_mask_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
 ; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
-; CHECK-NEXT:    vmv.v.i v12, 0
-; CHECK-NEXT:    vmerge.vvm v20, v12, v8, v0
-; CHECK-NEXT:    vwaddu.vv v8, v20, v16
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vwaddu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
     %mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
     %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -82,8 +81,8 @@ define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <v
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
 ; CHECK-NEXT:    vmv.v.i v12, 1
-; CHECK-NEXT:    vmerge.vvm v24, v12, v8, v0
-; CHECK-NEXT:    vwadd.wv v8, v16, v24
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vwadd.wv v8, v16, v12
 ; CHECK-NEXT:    ret
     %mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
     %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> splat (i32 1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index f94e46771f49c..b6e340c8b90e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -110,9 +110,8 @@ define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
 ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwadd.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwadd.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -124,9 +123,8 @@ define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
 ; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwaddu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -138,8 +136,8 @@ define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwadd_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwadd.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -153,8 +151,8 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwaddu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -216,9 +214,8 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
 ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwadd.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwadd.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -230,9 +227,8 @@ define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
 ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -244,8 +240,8 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwadd_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwadd.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -259,8 +255,8 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwaddu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -322,9 +318,8 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
 ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwadd.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwadd.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -336,9 +331,8 @@ define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
 ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwaddu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwaddu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -350,8 +344,8 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwadd_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwadd.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -365,8 +359,8 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwaddu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -592,9 +586,9 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vwaddu.vx v10, v8, a0
+; CHECK-NEXT:    vwaddu.vx v9, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -664,9 +658,9 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
 ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -678,9 +672,9 @@ define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vs
 ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwaddu.vv v12, v8, v9
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -694,9 +688,9 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -710,9 +704,9 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwaddu.vx v12, v8, a0
+; CHECK-NEXT:    vwaddu.vx v10, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -750,9 +744,9 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v14, a0
+; CHECK-NEXT:    vmv.v.x v13, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v14
+; CHECK-NEXT:    vsext.vf2 v12, v13
 ; CHECK-NEXT:    vwadd.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
@@ -766,9 +760,9 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v14, a0
+; CHECK-NEXT:    vmv.v.x v13, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v14
+; CHECK-NEXT:    vzext.vf2 v12, v13
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
@@ -782,9 +776,9 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
 ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -796,9 +790,9 @@ define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vs
 ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwaddu.vv v16, v8, v10
+; CHECK-NEXT:    vwaddu.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -812,9 +806,9 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -828,9 +822,9 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwaddu.vx v16, v8, a0
+; CHECK-NEXT:    vwaddu.vx v12, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -868,9 +862,9 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v18, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v20
+; CHECK-NEXT:    vsext.vf2 v16, v18
 ; CHECK-NEXT:    vwadd.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
@@ -884,9 +878,9 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v18, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v20
+; CHECK-NEXT:    vzext.vf2 v16, v18
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
@@ -1136,9 +1130,9 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
 ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1150,9 +1144,9 @@ define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwaddu.vv v12, v8, v9
+; CHECK-NEXT:    vwaddu.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1166,9 +1160,9 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwadd.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1182,9 +1176,9 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 ; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwaddu.vx v12, v8, a0
+; CHECK-NEXT:    vwaddu.vx v11, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1254,9 +1248,9 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
 ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1268,9 +1262,9 @@ define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwaddu.vv v16, v8, v9
+; CHECK-NEXT:    vwaddu.vv v14, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1284,9 +1278,9 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwadd.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1300,9 +1294,9 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 ; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwaddu.vx v16, v8, a0
+; CHECK-NEXT:    vwaddu.vx v14, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1340,9 +1334,9 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
 ; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v19, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v20
+; CHECK-NEXT:    vsext.vf4 v16, v19
 ; CHECK-NEXT:    vwadd.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
@@ -1356,9 +1350,9 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v19, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v20
+; CHECK-NEXT:    vzext.vf4 v16, v19
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
@@ -1514,8 +1508,8 @@ define <vscale x 8 x i64> @vwadd_vx_splat_zext(<vscale x 8 x i32> %va, i32 %b) {
 ; RV64-LABEL: vwadd_vx_splat_zext:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT:    vmv4r.v v16, v8
-; RV64-NEXT:    vwaddu.vx v8, v16, a0
+; RV64-NEXT:    vmv4r.v v12, v8
+; RV64-NEXT:    vwaddu.vx v8, v12, a0
 ; RV64-NEXT:    ret
   %zb = zext i32 %b to i64
   %head = insertelement <vscale x 8 x i64> poison, i64 %zb, i32 0
@@ -1589,8 +1583,8 @@ define <vscale x 8 x i64> @vwadd_vx_splat_sext(<vscale x 8 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vwadd_vx_splat_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwadd.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %sb = sext i32 %b to i64
   %head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
index a0ff37b600043..f7eec803f2197 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwadd_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwadd.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwadd.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwadd_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwadd.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwadd.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwadd_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwadd.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwadd.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwadd_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwadd.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwadd.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwadd_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwadd.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwadd.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwadd_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwadd.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwadd.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwadd.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwadd_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwadd.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwadd.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwadd_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwadd.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwadd.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwadd_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwadd_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwadd.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwadd.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwadd_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwadd.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwadd_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwadd.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwadd_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwadd.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwadd_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwadd.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwadd_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwadd.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwadd_vx_nxv16i32_nxv16i16_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwadd.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwadd_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwadd.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwadd_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwadd.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwadd_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwadd_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwadd.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
index 3850261e2117a..8c8d0eae5898e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll
@@ -1986,8 +1986,8 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vs
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwadd.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
@@ -2003,8 +2003,8 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwadd.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
@@ -2020,8 +2020,8 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwadd.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
@@ -2071,8 +2071,8 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<v
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwadd.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
@@ -2088,8 +2088,8 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<v
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwadd.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
@@ -2122,8 +2122,8 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<v
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwadd.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwadd.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
@@ -2139,8 +2139,8 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<v
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwadd.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwadd.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
@@ -2156,8 +2156,8 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<v
 ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwadd.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwadd.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
index 4f82844c0edfc..693d0e13592a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwaddu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwaddu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwaddu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwaddu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwaddu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwaddu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwaddu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwaddu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwaddu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwaddu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwaddu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwaddu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwaddu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwaddu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwaddu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwaddu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwaddu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwaddu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwaddu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwaddu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwaddu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwaddu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwaddu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwaddu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwaddu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwaddu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwaddu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwaddu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwaddu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwaddu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwaddu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwaddu_vx_nxv16i32_nxv16i16_i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwaddu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwaddu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwaddu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwaddu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwaddu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwaddu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwaddu_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwaddu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
index 8af403063f80a..f591830b63a7e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<v
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwaddu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwaddu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwaddu.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwaddu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwaddu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwaddu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwaddu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwaddu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwaddu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<
 ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwaddu.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwaddu.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
index b55a7f610aad5..4de5d6374515e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
@@ -90,9 +90,8 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
 ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmul.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmul.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -104,9 +103,8 @@ define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
 ; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -118,9 +116,8 @@ define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <v
 ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -132,8 +129,8 @@ define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwmul_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmul.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmul.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -147,8 +144,8 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -162,8 +159,8 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i3
 ; CHECK-LABEL: vwmulsu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -177,9 +174,8 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
 ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmul.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmul.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -191,9 +187,8 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
 ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -205,9 +200,8 @@ define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <v
 ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -219,8 +213,8 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwmul_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -234,8 +228,8 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -249,8 +243,8 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i3
 ; CHECK-LABEL: vwmulsu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -264,9 +258,8 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
 ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmul.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmul.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -278,9 +271,8 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
 ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -292,9 +284,8 @@ define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <v
 ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -306,8 +297,8 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwmul_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -321,8 +312,8 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -336,8 +327,8 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i3
 ; CHECK-LABEL: vwmulsu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -505,9 +496,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vwmulu.vx v10, v8, a0
+; CHECK-NEXT:    vwmulu.vx v9, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -539,9 +530,9 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
 ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -553,9 +544,9 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vs
 ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwmulu.vv v12, v8, v9
+; CHECK-NEXT:    vwmulu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -567,9 +558,9 @@ define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <v
 ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vzext.vf2 v14, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -583,9 +574,9 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -599,9 +590,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwmulu.vx v12, v8, a0
+; CHECK-NEXT:    vwmulu.vx v10, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -617,9 +608,9 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i1
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vzext.vf2 v14, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v12, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -633,9 +624,9 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
 ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -647,9 +638,9 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vs
 ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwmulu.vv v16, v8, v10
+; CHECK-NEXT:    vwmulu.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -661,9 +652,9 @@ define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <v
 ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vzext.vf2 v20, v10
-; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v16, v10
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -677,9 +668,9 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -693,9 +684,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwmulu.vx v16, v8, a0
+; CHECK-NEXT:    vwmulu.vx v12, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -711,9 +702,9 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i1
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vzext.vf2 v20, v10
-; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v16, v10
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -915,9 +906,9 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
 ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -929,9 +920,9 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwmulu.vv v12, v8, v9
+; CHECK-NEXT:    vwmulu.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -943,9 +934,9 @@ define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vsc
 ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v14, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -959,9 +950,9 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -975,9 +966,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 ; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwmulu.vx v12, v8, a0
+; CHECK-NEXT:    vwmulu.vx v11, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -993,9 +984,9 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v14, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -1009,9 +1000,9 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
 ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1023,9 +1014,9 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwmulu.vv v16, v8, v9
+; CHECK-NEXT:    vwmulu.vv v14, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1037,9 +1028,9 @@ define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vsc
 ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v20, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1053,9 +1044,9 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -1069,9 +1060,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 ; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwmulu.vx v16, v8, a0
+; CHECK-NEXT:    vwmulu.vx v14, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -1087,9 +1078,9 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v20, v9
-; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
index 3c8b6c8c4b561..62ae0b8782ca8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwmul_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmul.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmul.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmul.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwmul_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmul.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmul.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmul.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwmul_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmul.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmul.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmul.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwmul_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmul.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmul.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwmul_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmul.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmul.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmul.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwmul_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmul.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmul.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmul.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwmul_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmul.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmul.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmul.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwmul_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmul.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmul.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwmul_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmul_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmul.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmul.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmul.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwmul_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmul.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmul.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmul.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwmul_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmul.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwmul_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmul.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwmul_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmul.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmul.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmul.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwmul_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmul.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwmul_vx_nxv16i32_nxv16i16_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmul.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwmul_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmul.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmul.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmul.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwmul_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmul.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmul.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmul.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwmul_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwmul_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmul.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
index fa0f935513493..448b117a98c36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulsu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmulsu.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulsu_vv_nxv16i16_nxv16i8_nxv16i8(<vscal
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmulsu.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulsu_vv_nxv32i16_nxv32i8_nxv32i8(<vscal
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmulsu.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulsu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmulsu.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulsu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmulsu.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulsu_vv_nxv16i32_nxv16i16_nxv16i16(<vsc
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmulsu.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulsu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmulsu.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulsu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulsu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale
 ; CHECK-LABEL: intrinsic_vwmulsu_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmulsu.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulsu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmulsu.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulsu_vx_nxv16i16_nxv16i8_i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmulsu.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulsu_vx_nxv32i16_nxv32i8_i8(<vscale x 3
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmulsu.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulsu_vx_nxv4i32_nxv4i16_i16(<vscale x 4
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmulsu.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulsu_vx_nxv8i32_nxv8i16_i16(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmulsu.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulsu_vx_nxv16i32_nxv16i16_i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmulsu.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulsu_vx_nxv2i64_nxv2i32_i32(<vscale x 2
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmulsu.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulsu_vx_nxv4i64_nxv4i32_i32(<vscale x 4
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmulsu.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulsu_vx_nxv8i64_nxv8i32_i32(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwmulsu_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulsu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulsu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmulsu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
index 8c9f245921b66..6500ba9fe6df0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmulu.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmulu.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmulu.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmulu.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmulu.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmulu.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwmulu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwmulu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmulu.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwmulu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwmulu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwmulu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwmulu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmulu.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwmulu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwmulu.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwmulu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwmulu.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwmulu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwmulu.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwmulu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwmulu.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwmulu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwmulu.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwmulu_vx_nxv16i32_nxv16i16_i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwmulu.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwmulu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwmulu.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwmulu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwmulu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwmulu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwmulu.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwmulu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwmulu_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwmulu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwmulu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwmulu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
index 381f57e59aa76..e8d21c9f760de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
@@ -13,16 +13,15 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %y = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -35,16 +34,15 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %y = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -63,8 +61,8 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -77,18 +75,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -102,18 +100,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -137,8 +135,8 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -162,8 +160,8 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -187,8 +185,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -212,8 +210,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -227,16 +225,16 @@ define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a) {
 ; CHECK-LABEL: vwsll_vi_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %z = shl <vscale x 2 x i64> %x, splat (i64 2)
@@ -252,16 +250,15 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %y = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -274,16 +271,15 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %y = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -302,8 +298,8 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -324,8 +320,8 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -338,18 +334,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -363,18 +359,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -398,8 +394,8 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -423,8 +419,8 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -438,16 +434,16 @@ define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a) {
 ; CHECK-LABEL: vwsll_vi_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %z = shl <vscale x 4 x i32> %x, splat (i32 2)
@@ -463,16 +459,15 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %y = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -485,16 +480,15 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %y = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -513,8 +507,8 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -535,8 +529,8 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -557,8 +551,8 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b) {
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -571,18 +565,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b)
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -596,18 +590,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b)
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -621,16 +615,16 @@ define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a) {
 ; CHECK-LABEL: vwsll_vi_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %z = shl <vscale x 8 x i16> %x, splat (i16 2)
@@ -710,20 +704,20 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv.v.x v12, a0
+; CHECK-ZVBB-NEXT:    vmv.v.x v9, a0
 ; CHECK-ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-ZVBB-NEXT:    vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT:    vsext.vf2 v8, v12
+; CHECK-ZVBB-NEXT:    vsext.vf2 v8, v9
 ; CHECK-ZVBB-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
@@ -738,18 +732,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -801,8 +795,8 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -854,8 +848,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a,
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -876,8 +870,8 @@ define <vscale x 2 x i64> @vwsll_vi_nxv2i64_nxv2i8(<vscale x 2 x i8> %a) {
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vzext.vf4 v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT:    vzext.vf4 v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
   %z = shl <vscale x 2 x i64> %x, splat (i64 2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
index 87bb5fa0238ce..251e041539e4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll
@@ -15,16 +15,15 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_sext(<vscale x 2 x i32> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %y = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -37,16 +36,15 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_zext(<vscale x 2 x i32> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %y = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -72,15 +70,15 @@ define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64(<vscale x 2 x i32> %a, i64 %b, <
 ; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-ZVBB32:       # %bb.0:
 ; CHECK-ZVBB32-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
-; CHECK-ZVBB32-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB32-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB32-NEXT:    ret
 ;
 ; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv2i64:
 ; CHECK-ZVBB64:       # %bb.0:
 ; CHECK-ZVBB64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB64-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB64-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB64-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -93,18 +91,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_sext(<vscale x 2 x i32> %a, i32
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -118,18 +116,18 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_zext(<vscale x 2 x i32> %a, i32
 ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -153,8 +151,8 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_sext(<vscale x 2 x i32> %a, i16
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -178,8 +176,8 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_zext(<vscale x 2 x i32> %a, i16
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -203,8 +201,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_sext(<vscale x 2 x i32> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -228,8 +226,8 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -243,16 +241,16 @@ define <vscale x 2 x i64> @vwsll_vi_nxv2i64(<vscale x 2 x i32> %a, <vscale x 2 x
 ; CHECK-LABEL: vwsll_vi_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %z = call <vscale x 2 x i64> @llvm.vp.shl.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> splat (i64 2), <vscale x 2 x i1> %m, i32 %vl)
@@ -270,16 +268,15 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_sext(<vscale x 4 x i16> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %y = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -292,16 +289,15 @@ define <vscale x 4 x i32> @vwsll_vv_nxv4i32_zext(<vscale x 4 x i16> %a, <vscale
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %y = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -327,15 +323,15 @@ define <vscale x 4 x i32> @vwsll_vx_i64_nxv4i32(<vscale x 4 x i16> %a, i64 %b, <
 ; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-ZVBB32:       # %bb.0:
 ; CHECK-ZVBB32-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
-; CHECK-ZVBB32-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB32-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB32-NEXT:    ret
 ;
 ; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv4i32:
 ; CHECK-ZVBB64:       # %bb.0:
 ; CHECK-ZVBB64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB64-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB64-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB64-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -356,8 +352,8 @@ define <vscale x 4 x i32> @vwsll_vx_i32_nxv4i32(<vscale x 4 x i16> %a, i32 %b, <
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -370,18 +366,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_sext(<vscale x 4 x i16> %a, i16
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -395,18 +391,18 @@ define <vscale x 4 x i32> @vwsll_vx_i16_nxv4i32_zext(<vscale x 4 x i16> %a, i16
 ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -430,8 +426,8 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_sext(<vscale x 4 x i16> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -455,8 +451,8 @@ define <vscale x 4 x i32> @vwsll_vx_i8_nxv4i32_zext(<vscale x 4 x i16> %a, i8 %b
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -470,16 +466,16 @@ define <vscale x 4 x i32> @vwsll_vi_nxv4i32(<vscale x 4 x i16> %a, <vscale x 4 x
 ; CHECK-LABEL: vwsll_vi_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %z = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> splat (i32 2), <vscale x 4 x i1> %m, i32 %vl)
@@ -498,16 +494,15 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_sext(<vscale x 8 x i8> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %y = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -520,16 +515,15 @@ define <vscale x 8 x i16> @vwsll_vv_nxv8i16_zext(<vscale x 8 x i8> %a, <vscale x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v12, v9
-; CHECK-NEXT:    vsll.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vzext.vf2 v8, v9
+; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v9
-; CHECK-ZVBB-NEXT:    vmv1r.v v11, v8
-; CHECK-ZVBB-NEXT:    vwsll.vv v8, v11, v10, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
+; CHECK-ZVBB-NEXT:    vwsll.vv v8, v10, v9, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %y = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -555,15 +549,15 @@ define <vscale x 8 x i16> @vwsll_vx_i64_nxv8i16(<vscale x 8 x i8> %a, i64 %b, <v
 ; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-ZVBB32:       # %bb.0:
 ; CHECK-ZVBB32-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
-; CHECK-ZVBB32-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB32-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB32-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB32-NEXT:    ret
 ;
 ; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv8i16:
 ; CHECK-ZVBB64:       # %bb.0:
 ; CHECK-ZVBB64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB64-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB64-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB64-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB64-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -584,8 +578,8 @@ define <vscale x 8 x i16> @vwsll_vx_i32_nxv8i16(<vscale x 8 x i8> %a, i32 %b, <v
 ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -606,8 +600,8 @@ define <vscale x 8 x i16> @vwsll_vx_i16_nxv8i16(<vscale x 8 x i8> %a, i16 %b, <v
 ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -620,18 +614,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_sext(<vscale x 8 x i8> %a, i8 %b,
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_sext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -645,18 +639,18 @@ define <vscale x 8 x i16> @vwsll_vx_i8_nxv8i16_zext(<vscale x 8 x i8> %a, i8 %b,
 ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    vsll.vv v8, v10, v8, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_zext:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vx v8, v10, a0, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vx v8, v9, a0, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -670,16 +664,16 @@ define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a, <vscale x 8 x
 ; CHECK-LABEL: vwsll_vi_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    li a0, 4
-; CHECK-NEXT:    vwmulu.vx v8, v10, a0, v0.t
+; CHECK-NEXT:    vwmulu.vx v8, v9, a0, v0.t
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT:    vmv1r.v v10, v8
-; CHECK-ZVBB-NEXT:    vwsll.vi v8, v10, 2, v0.t
+; CHECK-ZVBB-NEXT:    vmv1r.v v9, v8
+; CHECK-ZVBB-NEXT:    vwsll.vi v8, v9, 2, v0.t
 ; CHECK-ZVBB-NEXT:    ret
   %x = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %z = call <vscale x 8 x i16> @llvm.vp.shl.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> splat (i16 2), <vscale x 8 x i1> %m, i32 %vl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
index 14af93408d3d8..61a8820e263eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsll.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsll.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsll.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsll.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsll.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsll.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsll.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsll.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsll.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsll.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsll.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsll.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsll_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsll.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsll.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vx_nxv8i16_nxv8i8(<vscale x 8 x i8> %
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vx_nxv16i16_nxv16i8(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vx_nxv32i16_nxv32i8(<vscale x 32 x i
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vx_nxv4i32_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vx_nxv8i32_nxv8i16(<vscale x 8 x i16>
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vx_nxv16i32_nxv16i16(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vx_nxv2i64_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vx_nxv4i64_nxv4i32(<vscale x 4 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vx_nxv8i64_nxv8i32(<vscale x 8 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32(
@@ -1559,8 +1550,8 @@ define <vscale x 8 x i16> @intrinsic_vwsll_vi_nxv8i16_nxv8i8(<vscale x 8 x i8> %
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vi v8, v10, 1
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vi v8, v9, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsll.nxv8i16.nxv8i8(
@@ -1593,8 +1584,8 @@ define <vscale x 16 x i16> @intrinsic_vwsll_vi_nxv16i16_nxv16i8(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vi v8, v12, 1
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vi v8, v10, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsll.nxv16i16.nxv16i8(
@@ -1627,8 +1618,8 @@ define <vscale x 32 x i16> @intrinsic_vwsll_vi_nxv32i16_nxv32i8(<vscale x 32 x i
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vi v8, v16, 1
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vi v8, v12, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsll.nxv32i16.nxv32i8(
@@ -1729,8 +1720,8 @@ define <vscale x 4 x i32> @intrinsic_vwsll_vi_nxv4i32_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vi v8, v10, 1
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vi v8, v9, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsll.nxv4i32.nxv4i16(
@@ -1763,8 +1754,8 @@ define <vscale x 8 x i32> @intrinsic_vwsll_vi_nxv8i32_nxv8i16(<vscale x 8 x i16>
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vi v8, v12, 1
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vi v8, v10, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsll.nxv8i32.nxv8i16(
@@ -1797,8 +1788,8 @@ define <vscale x 16 x i32> @intrinsic_vwsll_vi_nxv16i32_nxv16i16(<vscale x 16 x
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vi v8, v16, 1
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vi v8, v12, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsll.nxv16i32.nxv16i16(
@@ -1865,8 +1856,8 @@ define <vscale x 2 x i64> @intrinsic_vwsll_vi_nxv2i64_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsll.vi v8, v10, 1
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsll.vi v8, v9, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsll.nxv2i64.nxv2i32(
@@ -1899,8 +1890,8 @@ define <vscale x 4 x i64> @intrinsic_vwsll_vi_nxv4i64_nxv4i32(<vscale x 4 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsll.vi v8, v12, 1
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsll.vi v8, v10, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsll.nxv4i64.nxv4i32(
@@ -1933,8 +1924,8 @@ define <vscale x 8 x i64> @intrinsic_vwsll_vi_nxv8i64_nxv8i32(<vscale x 8 x i32>
 ; CHECK-LABEL: intrinsic_vwsll_vi_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsll.vi v8, v16, 1
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsll.vi v8, v12, 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsll.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
index 22f9f97373415..fcb14ae04face 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
@@ -41,13 +41,12 @@ define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x
 define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
 ; CHECK-LABEL: vwsubu_vv_mask_v8i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
 ; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
-; CHECK-NEXT:    vmv.v.i v12, 0
-; CHECK-NEXT:    vmerge.vvm v20, v12, v8, v0
-; CHECK-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vwsubu.vv v8, v12, v16
 ; CHECK-NEXT:    ret
     %mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
     %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
@@ -64,8 +63,8 @@ define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <v
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmslt.vx v0, v8, a0
 ; CHECK-NEXT:    vmv.v.i v12, 1
-; CHECK-NEXT:    vmerge.vvm v24, v12, v8, v0
-; CHECK-NEXT:    vwsub.wv v8, v16, v24
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v16, v12
 ; CHECK-NEXT:    ret
     %mask = icmp slt <vscale x 8 x i32> %x, splat (i32 42)
     %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> splat (i32 1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index 631c8c3ca48f9..2889d37ec2b05 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -110,9 +110,8 @@ define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vsc
 ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsub.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsub.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -124,9 +123,8 @@ define <vscale x 2 x i64> @vwsubu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vs
 ; CHECK-LABEL: vwsubu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsubu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsubu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
@@ -138,8 +136,8 @@ define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwsub_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsub.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -153,8 +151,8 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32
 ; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsubu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -216,9 +214,8 @@ define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vsc
 ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsub.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsub.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -230,9 +227,8 @@ define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vs
 ; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsubu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsubu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
@@ -244,8 +240,8 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwsub_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsub.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -259,8 +255,8 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32
 ; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsubu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -322,9 +318,8 @@ define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vsc
 ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsub.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsub.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -336,9 +331,8 @@ define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vs
 ; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsubu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsubu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
@@ -350,8 +344,8 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwsub_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsub.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -365,8 +359,8 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32
 ; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsubu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -592,9 +586,9 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vwsubu.vx v10, v8, a0
+; CHECK-NEXT:    vwsubu.vx v9, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    vsext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -664,9 +658,9 @@ define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
 ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -678,9 +672,9 @@ define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vs
 ; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwsubu.vv v12, v8, v9
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -694,9 +688,9 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v8
-; CHECK-NEXT:    vsext.vf2 v14, v9
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v12, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -710,9 +704,9 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vwsubu.vx v12, v8, a0
+; CHECK-NEXT:    vwsubu.vx v10, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    vsext.vf2 v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -750,9 +744,9 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v14, a0
+; CHECK-NEXT:    vmv.v.x v13, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf2 v12, v14
+; CHECK-NEXT:    vsext.vf2 v12, v13
 ; CHECK-NEXT:    vwsub.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
@@ -766,9 +760,9 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v14, a0
+; CHECK-NEXT:    vmv.v.x v13, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vzext.vf2 v12, v14
+; CHECK-NEXT:    vzext.vf2 v12, v13
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
@@ -782,9 +776,9 @@ define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
 ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -796,9 +790,9 @@ define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vs
 ; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwsubu.vv v16, v8, v10
+; CHECK-NEXT:    vwsubu.vv v12, v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -812,9 +806,9 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -828,9 +822,9 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vwsubu.vx v16, v8, a0
+; CHECK-NEXT:    vwsubu.vx v12, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    vsext.vf2 v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -868,9 +862,9 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v18, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v20
+; CHECK-NEXT:    vsext.vf2 v16, v18
 ; CHECK-NEXT:    vwsub.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
@@ -884,9 +878,9 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v18, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v20
+; CHECK-NEXT:    vzext.vf2 v16, v18
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
@@ -1136,9 +1130,9 @@ define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
 ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1150,9 +1144,9 @@ define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vsca
 ; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwsubu.vv v12, v8, v9
+; CHECK-NEXT:    vwsubu.vv v11, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1166,9 +1160,9 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v14, v9
-; CHECK-NEXT:    vwsub.vv v8, v12, v14
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1182,9 +1176,9 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 ; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vwsubu.vx v12, v8, a0
+; CHECK-NEXT:    vwsubu.vx v11, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    vsext.vf4 v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1254,9 +1248,9 @@ define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
 ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1268,9 +1262,9 @@ define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vsca
 ; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwsubu.vv v16, v8, v9
+; CHECK-NEXT:    vwsubu.vv v14, v8, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1284,9 +1278,9 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v20, v9
-; CHECK-NEXT:    vwsub.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1300,9 +1294,9 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 ; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vwsubu.vx v16, v8, a0
+; CHECK-NEXT:    vwsubu.vx v14, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    vsext.vf4 v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1340,9 +1334,9 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
 ; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v19, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v20
+; CHECK-NEXT:    vsext.vf4 v16, v19
 ; CHECK-NEXT:    vwsub.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
@@ -1356,9 +1350,9 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
 ; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v20, a0
+; CHECK-NEXT:    vmv.v.x v19, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v20
+; CHECK-NEXT:    vzext.vf4 v16, v19
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
index 24e03df413bfd..b10217d5c59e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwsub_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsub.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsub.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwsub_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsub.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsub.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwsub_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsub.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsub.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwsub_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsub.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsub.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwsub_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsub.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsub.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwsub_vv_nxv16i32_nxv16i16_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsub.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsub.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsub.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwsub_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsub.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsub.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwsub_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsub.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsub.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwsub_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsub_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsub.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsub.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwsub_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i8
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsub.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwsub_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsub.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwsub_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsub.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwsub_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsub.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwsub_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsub.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwsub_vx_nxv16i32_nxv16i16_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsub.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsub.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwsub_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsub.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwsub_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsub.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwsub_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwsub_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsub.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
index 1bc6103d298b2..5c114a56017ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vs
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsub.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwsub.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<v
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsub.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<v
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<v
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsub.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsub.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<v
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsub.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<v
 ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwsub.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsub.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
index d849b0aaffc63..8d1e61741f319 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.ll
@@ -158,9 +158,8 @@ define <vscale x 8 x i16> @intrinsic_vwsubu_vv_nxv8i16_nxv8i8_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i16_nxv8i8_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsubu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsubu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.nxv8i16.nxv8i8.nxv8i8(
@@ -207,9 +206,8 @@ define <vscale x 16 x i16> @intrinsic_vwsubu_vv_nxv16i16_nxv16i8_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv16i16_nxv16i8_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsubu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsubu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.nxv16i16.nxv16i8.nxv16i8(
@@ -256,9 +254,8 @@ define <vscale x 32 x i16> @intrinsic_vwsubu_vv_nxv32i16_nxv32i8_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv32i16_nxv32i8_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsubu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsubu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.nxv32i16.nxv32i8.nxv32i8(
@@ -401,9 +398,8 @@ define <vscale x 4 x i32> @intrinsic_vwsubu_vv_nxv4i32_nxv4i16_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv4i32_nxv4i16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsubu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsubu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.nxv4i32.nxv4i16.nxv4i16(
@@ -450,9 +446,8 @@ define <vscale x 8 x i32> @intrinsic_vwsubu_vv_nxv8i32_nxv8i16_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i32_nxv8i16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsubu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsubu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.nxv8i32.nxv8i16.nxv8i16(
@@ -499,9 +494,8 @@ define <vscale x 16 x i32> @intrinsic_vwsubu_vv_nxv16i32_nxv16i16_nxv16i16(<vsca
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv16i32_nxv16i16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsubu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsubu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.nxv16i32.nxv16i16.nxv16i16(
@@ -596,9 +590,8 @@ define <vscale x 2 x i64> @intrinsic_vwsubu_vv_nxv2i64_nxv2i32_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv2i64_nxv2i32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
-; CHECK-NEXT:    vmv1r.v v11, v8
-; CHECK-NEXT:    vwsubu.vv v8, v11, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vwsubu.vv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.nxv2i64.nxv2i32.nxv2i32(
@@ -645,9 +638,8 @@ define <vscale x 4 x i64> @intrinsic_vwsubu_vv_nxv4i64_nxv4i32_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv4i64_nxv4i32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v10
-; CHECK-NEXT:    vmv2r.v v14, v8
-; CHECK-NEXT:    vwsubu.vv v8, v14, v12
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vwsubu.vv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.nxv4i32(
@@ -694,9 +686,8 @@ define <vscale x 8 x i64> @intrinsic_vwsubu_vv_nxv8i64_nxv8i32_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vv_nxv8i64_nxv8i32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v12
-; CHECK-NEXT:    vmv4r.v v20, v8
-; CHECK-NEXT:    vwsubu.vv v8, v20, v16
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vwsubu.vv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.nxv8i64.nxv8i32.nxv8i32(
@@ -887,8 +878,8 @@ define <vscale x 8 x i16> @intrinsic_vwsubu_vx_nxv8i16_nxv8i8_i8(<vscale x 8 x i
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i16_nxv8i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsubu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.nxv8i16.nxv8i8.i8(
@@ -935,8 +926,8 @@ define <vscale x 16 x i16> @intrinsic_vwsubu_vx_nxv16i16_nxv16i8_i8(<vscale x 16
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv16i16_nxv16i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsubu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.nxv16i16.nxv16i8.i8(
@@ -983,8 +974,8 @@ define <vscale x 32 x i16> @intrinsic_vwsubu_vx_nxv32i16_nxv32i8_i8(<vscale x 32
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv32i16_nxv32i8_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsubu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.nxv32i16.nxv32i8.i8(
@@ -1127,8 +1118,8 @@ define <vscale x 4 x i32> @intrinsic_vwsubu_vx_nxv4i32_nxv4i16_i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv4i32_nxv4i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsubu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.nxv4i32.nxv4i16.i16(
@@ -1175,8 +1166,8 @@ define <vscale x 8 x i32> @intrinsic_vwsubu_vx_nxv8i32_nxv8i16_i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i32_nxv8i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsubu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.nxv8i32.nxv8i16.i16(
@@ -1223,8 +1214,8 @@ define <vscale x 16 x i32> @intrinsic_vwsubu_vx_nxv16i32_nxv16i16_i16(<vscale x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv16i32_nxv16i16_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsubu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.nxv16i32.nxv16i16.i16(
@@ -1319,8 +1310,8 @@ define <vscale x 2 x i64> @intrinsic_vwsubu_vx_nxv2i64_nxv2i32_i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv2i64_nxv2i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vwsubu.vx v8, v10, a0
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.vx v8, v9, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.nxv2i64.nxv2i32.i32(
@@ -1367,8 +1358,8 @@ define <vscale x 4 x i64> @intrinsic_vwsubu_vx_nxv4i64_nxv4i32_i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv4i64_nxv4i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vwsubu.vx v8, v12, a0
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.vx v8, v10, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.nxv4i64.nxv4i32.i32(
@@ -1415,8 +1406,8 @@ define <vscale x 8 x i64> @intrinsic_vwsubu_vx_nxv8i64_nxv8i32_i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vwsubu_vx_nxv8i64_nxv8i32_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vwsubu.vx v8, v16, a0
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.vx v8, v12, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.nxv8i64.nxv8i32.i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
index 01b3287e4ed31..c1905d124f801 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll
@@ -1982,8 +1982,8 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<v
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsubu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
@@ -1999,8 +1999,8 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsubu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
@@ -2016,8 +2016,8 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwsubu.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
@@ -2067,8 +2067,8 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsubu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
@@ -2084,8 +2084,8 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsubu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
@@ -2118,8 +2118,8 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vwsubu.wv v8, v10, v12
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vwsubu.wv v8, v10, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
@@ -2135,8 +2135,8 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vwsubu.wv v8, v12, v16
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vwsubu.wv v8, v12, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
@@ -2152,8 +2152,8 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<
 ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv4r.v v24, v8
-; CHECK-NEXT:    vwsubu.wv v8, v16, v24
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vwsubu.wv v8, v16, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
index 34337b1af1df5..e871e16a98c1c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll
@@ -128,8 +128,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64(<vscale x 2 x i32> %a, <vscale
 ; CHECK-LABEL: vzext_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9, v0.t
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
   ret <vscale x 2 x i64> %v
@@ -139,8 +139,8 @@ define <vscale x 2 x i64> @vzext_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i32> %a,
 ; CHECK-LABEL: vzext_nxv2i32_nxv2i64_unmasked:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
   %v = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
   ret <vscale x 2 x i64> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext.ll b/llvm/test/CodeGen/RISCV/rvv/vzext.ll
index a883041000a9a..19e52ab41f841 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext.ll
@@ -145,8 +145,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf8_nxv8i64(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vzext_vf8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v16, v8
-; CHECK-NEXT:    vzext.vf8 v8, v16
+; CHECK-NEXT:    vmv1r.v v15, v8
+; CHECK-NEXT:    vzext.vf8 v8, v15
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i8(
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @intrinsic_vzext_vf4_nxv4i64(<vscale x 4 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i16(
@@ -321,8 +321,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf4_nxv8i64(<vscale x 8 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i16(
@@ -497,8 +497,8 @@ define <vscale x 8 x i32> @intrinsic_vzext_vf4_nxv8i32(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv1r.v v12, v8
-; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vzext.vf4 v8, v11
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
@@ -541,8 +541,8 @@ define <vscale x 16 x i32> @intrinsic_vzext_vf4_nxv16i32(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vzext_vf4_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv2r.v v16, v8
-; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    vmv2r.v v14, v8
+; CHECK-NEXT:    vzext.vf4 v8, v14
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
@@ -629,8 +629,8 @@ define <vscale x 2 x i64> @intrinsic_vzext_vf2_nxv2i64(<vscale x 2 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i32(
@@ -673,8 +673,8 @@ define <vscale x 4 x i64> @intrinsic_vzext_vf2_nxv4i64(<vscale x 4 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i32(
@@ -717,8 +717,8 @@ define <vscale x 8 x i64> @intrinsic_vzext_vf2_nxv8i64(<vscale x 8 x i32> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i32(
@@ -849,8 +849,8 @@ define <vscale x 4 x i32> @intrinsic_vzext_vf2_nxv4i32(<vscale x 4 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
@@ -893,8 +893,8 @@ define <vscale x 8 x i32> @intrinsic_vzext_vf2_nxv8i32(<vscale x 8 x i16> %0, iX
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
@@ -937,8 +937,8 @@ define <vscale x 16 x i32> @intrinsic_vzext_vf2_nxv16i32(<vscale x 16 x i16> %0,
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i16> @intrinsic_vzext_vf2_nxv8i16(<vscale x 8 x i8> %0, iXL
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
-; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
@@ -1157,8 +1157,8 @@ define <vscale x 16 x i16> @intrinsic_vzext_vf2_nxv16i16(<vscale x 16 x i8> %0,
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    vmv2r.v v10, v8
+; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
@@ -1201,8 +1201,8 @@ define <vscale x 32 x i16> @intrinsic_vzext_vf2_nxv32i16(<vscale x 32 x i8> %0,
 ; CHECK-LABEL: intrinsic_vzext_vf2_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vzext.vf2 v8, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
index 107d59670c4c9..deb5235305243 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/zvbb-demanded-bits.ll
@@ -34,9 +34,9 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_zext(<vscale x 2 x i32> %a, i8 %b
 ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v9, v8
 ; CHECK-NEXT:    addiw a0, a0, 1
-; CHECK-NEXT:    vwsll.vx v8, v10, a0
+; CHECK-NEXT:    vwsll.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %s = add i8 %b, 1
   %head = insertelement <vscale x 2 x i8> poison, i8 %s, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll
index 87a984bda1fee..59a8fb173b964 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll
@@ -8,9 +8,9 @@ define i32 @vqdot_vv(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; NODOT-LABEL: vqdot_vv:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vsext.vf2 v20, v10
-; NODOT-NEXT:    vwmul.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vsext.vf2 v16, v10
+; NODOT-NEXT:    vwmul.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v16, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v16
@@ -38,9 +38,9 @@ define i32 @vqdot_vx_constant(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: vqdot_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, 23
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v16, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v16
@@ -57,9 +57,9 @@ define i32 @vqdot_vx_constant_swapped(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: vqdot_vx_constant_swapped:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, 23
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v16, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v16
@@ -106,9 +106,9 @@ define i32 @vqdotu_vx_constant(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: vqdotu_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, 123
-; CHECK-NEXT:    vwmulu.vx v8, v16, a0
+; CHECK-NEXT:    vwmulu.vx v8, v12, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v16, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v16
@@ -125,9 +125,9 @@ define i32 @vqdotsu_vv(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; NODOT-LABEL: vqdotsu_vv:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vzext.vf2 v20, v10
-; NODOT-NEXT:    vwmulsu.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vzext.vf2 v16, v10
+; NODOT-NEXT:    vwmulsu.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v16, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v16
@@ -155,9 +155,9 @@ define i32 @vqdotsu_vv_swapped(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; NODOT-LABEL: vqdotsu_vv_swapped:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vzext.vf2 v20, v10
-; NODOT-NEXT:    vwmulsu.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vzext.vf2 v16, v10
+; NODOT-NEXT:    vwmulsu.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v16, zero
 ; NODOT-NEXT:    vredsum.vs v8, v8, v16
@@ -185,9 +185,9 @@ define i32 @vdotqsu_vx_constant(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: vdotqsu_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, 123
-; CHECK-NEXT:    vwmul.vx v8, v16, a0
+; CHECK-NEXT:    vwmul.vx v8, v12, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v16, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v16
@@ -204,10 +204,10 @@ define i32 @vdotqus_vx_constant(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: vdotqus_vx_constant:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v12, v8
 ; CHECK-NEXT:    li a0, -23
-; CHECK-NEXT:    vmv.v.x v20, a0
-; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v16, zero
 ; CHECK-NEXT:    vredsum.vs v8, v8, v16
@@ -279,8 +279,8 @@ define i32 @vqdot_vv_accum(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vsext.vf2 v24, v10
-; NODOT-NEXT:    vwmacc.vv v16, v12, v24
+; NODOT-NEXT:    vsext.vf2 v8, v10
+; NODOT-NEXT:    vwmacc.vv v16, v12, v8
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v8, zero
 ; NODOT-NEXT:    vredsum.vs v8, v16, v8
@@ -341,8 +341,8 @@ define i32 @vqdotsu_vv_accum(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vsca
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vzext.vf2 v24, v10
-; NODOT-NEXT:    vwmaccsu.vv v16, v12, v24
+; NODOT-NEXT:    vzext.vf2 v8, v10
+; NODOT-NEXT:    vwmaccsu.vv v16, v12, v8
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v8, zero
 ; NODOT-NEXT:    vredsum.vs v8, v16, v8
@@ -371,9 +371,9 @@ define i32 @vqdot_vv_scalar_add(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i3
 ; NODOT-LABEL: vqdot_vv_scalar_add:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vsext.vf2 v20, v10
-; NODOT-NEXT:    vwmul.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vsext.vf2 v16, v10
+; NODOT-NEXT:    vwmul.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v16, a0
 ; NODOT-NEXT:    vredsum.vs v8, v8, v16
@@ -433,9 +433,9 @@ define i32 @vqdotsu_vv_scalar_add(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
 ; NODOT-LABEL: vqdotsu_vv_scalar_add:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vzext.vf2 v20, v10
-; NODOT-NEXT:    vwmulsu.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vzext.vf2 v16, v10
+; NODOT-NEXT:    vwmulsu.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; NODOT-NEXT:    vmv.s.x v16, a0
 ; NODOT-NEXT:    vredsum.vs v8, v8, v16
@@ -536,9 +536,9 @@ define <vscale x 2 x i32> @partial_reduce_m1(<vscale x 8 x i8> %a, <vscale x 8 x
 ; NODOT-LABEL: partial_reduce_m1:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; NODOT-NEXT:    vsext.vf2 v12, v8
-; NODOT-NEXT:    vsext.vf2 v14, v9
-; NODOT-NEXT:    vwmul.vv v8, v12, v14
+; NODOT-NEXT:    vsext.vf2 v10, v8
+; NODOT-NEXT:    vsext.vf2 v12, v9
+; NODOT-NEXT:    vwmul.vv v8, v10, v12
 ; NODOT-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; NODOT-NEXT:    vadd.vv v8, v11, v8
 ; NODOT-NEXT:    vadd.vv v9, v9, v10
@@ -564,9 +564,9 @@ define <vscale x 4 x i32> @partial_reduce_m2(<vscale x 16 x i8> %a, <vscale x 16
 ; NODOT-LABEL: partial_reduce_m2:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vsext.vf2 v20, v10
-; NODOT-NEXT:    vwmul.vv v8, v16, v20
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vsext.vf2 v16, v10
+; NODOT-NEXT:    vwmul.vv v8, v12, v16
 ; NODOT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; NODOT-NEXT:    vadd.vv v8, v14, v8
 ; NODOT-NEXT:    vadd.vv v10, v10, v12
@@ -592,12 +592,12 @@ define <vscale x 8 x i32> @partial_reduce_m4(<vscale x 32 x i8> %a, <vscale x 32
 ; NODOT-LABEL: partial_reduce_m4:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v24, v8
+; NODOT-NEXT:    vsext.vf2 v20, v8
 ; NODOT-NEXT:    vsext.vf2 v16, v10
-; NODOT-NEXT:    vsext.vf2 v28, v12
-; NODOT-NEXT:    vsext.vf2 v20, v14
-; NODOT-NEXT:    vwmul.vv v8, v16, v20
-; NODOT-NEXT:    vwmul.vv v16, v24, v28
+; NODOT-NEXT:    vsext.vf2 v24, v12
+; NODOT-NEXT:    vsext.vf2 v12, v14
+; NODOT-NEXT:    vwmul.vv v8, v16, v12
+; NODOT-NEXT:    vwmul.vv v16, v20, v24
 ; NODOT-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; NODOT-NEXT:    vadd.vv v16, v20, v16
 ; NODOT-NEXT:    vadd.vv v8, v12, v8
@@ -622,22 +622,13 @@ entry:
 define <vscale x 16 x i32> @partial_reduce_m8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b) {
 ; NODOT-LABEL: partial_reduce_m8:
 ; NODOT:       # %bb.0: # %entry
-; NODOT-NEXT:    addi sp, sp, -16
-; NODOT-NEXT:    .cfi_def_cfa_offset 16
-; NODOT-NEXT:    csrr a0, vlenb
-; NODOT-NEXT:    slli a0, a0, 2
-; NODOT-NEXT:    sub sp, sp, a0
-; NODOT-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v24, v10
-; NODOT-NEXT:    addi a0, sp, 16
-; NODOT-NEXT:    vs4r.v v24, (a0) # vscale x 32-byte Folded Spill
-; NODOT-NEXT:    vsext.vf2 v0, v8
+; NODOT-NEXT:    vsext.vf2 v4, v10
+; NODOT-NEXT:    vsext.vf2 v28, v8
 ; NODOT-NEXT:    vsext.vf2 v8, v18
-; NODOT-NEXT:    vsext.vf2 v4, v16
-; NODOT-NEXT:    vwmul.vv v24, v0, v4
-; NODOT-NEXT:    vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; NODOT-NEXT:    vwmacc.vv v24, v16, v8
+; NODOT-NEXT:    vsext.vf2 v0, v16
+; NODOT-NEXT:    vwmul.vv v24, v28, v0
+; NODOT-NEXT:    vwmacc.vv v24, v4, v8
 ; NODOT-NEXT:    vsext.vf2 v8, v12
 ; NODOT-NEXT:    vsext.vf2 v16, v20
 ; NODOT-NEXT:    vwmacc.vv v24, v8, v16
@@ -645,12 +636,6 @@ define <vscale x 16 x i32> @partial_reduce_m8(<vscale x 64 x i8> %a, <vscale x 6
 ; NODOT-NEXT:    vsext.vf2 v12, v22
 ; NODOT-NEXT:    vwmacc.vv v24, v8, v12
 ; NODOT-NEXT:    vmv8r.v v8, v24
-; NODOT-NEXT:    csrr a0, vlenb
-; NODOT-NEXT:    slli a0, a0, 2
-; NODOT-NEXT:    add sp, sp, a0
-; NODOT-NEXT:    .cfi_def_cfa sp, 16
-; NODOT-NEXT:    addi sp, sp, 16
-; NODOT-NEXT:    .cfi_def_cfa_offset 0
 ; NODOT-NEXT:    ret
 ;
 ; DOT-LABEL: partial_reduce_m8:
@@ -685,6 +670,8 @@ define <vscale x 32 x i32> @partial_reduce_m16(<vscale x 128 x i8> %a, <vscale x
 ; NODOT-NEXT:    add a1, sp, a1
 ; NODOT-NEXT:    addi a1, a1, 16
 ; NODOT-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; NODOT-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; NODOT-NEXT:    vmv8r.v v0, v8
 ; NODOT-NEXT:    addi a1, sp, 16
 ; NODOT-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
 ; NODOT-NEXT:    vl8r.v v16, (a0)
@@ -693,17 +680,16 @@ define <vscale x 32 x i32> @partial_reduce_m16(<vscale x 128 x i8> %a, <vscale x
 ; NODOT-NEXT:    add a1, sp, a1
 ; NODOT-NEXT:    addi a1, a1, 16
 ; NODOT-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; NODOT-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v4, v8
-; NODOT-NEXT:    vsext.vf2 v0, v16
-; NODOT-NEXT:    vwmul.vv v24, v4, v0
-; NODOT-NEXT:    vsext.vf2 v4, v10
-; NODOT-NEXT:    vsext.vf2 v8, v18
-; NODOT-NEXT:    vwmacc.vv v24, v4, v8
+; NODOT-NEXT:    vsext.vf2 v8, v0
+; NODOT-NEXT:    vsext.vf2 v12, v16
+; NODOT-NEXT:    vwmul.vv v24, v8, v12
+; NODOT-NEXT:    vsext.vf2 v8, v2
+; NODOT-NEXT:    vsext.vf2 v12, v18
+; NODOT-NEXT:    vwmacc.vv v24, v8, v12
 ; NODOT-NEXT:    csrr a1, vlenb
 ; NODOT-NEXT:    slli a1, a1, 3
 ; NODOT-NEXT:    add a0, a0, a1
-; NODOT-NEXT:    vsext.vf2 v0, v12
+; NODOT-NEXT:    vsext.vf2 v0, v4
 ; NODOT-NEXT:    vl8r.v v8, (a0)
 ; NODOT-NEXT:    csrr a0, vlenb
 ; NODOT-NEXT:    slli a0, a0, 3
@@ -717,17 +703,17 @@ define <vscale x 32 x i32> @partial_reduce_m16(<vscale x 128 x i8> %a, <vscale x
 ; NODOT-NEXT:    add a0, sp, a0
 ; NODOT-NEXT:    addi a0, a0, 16
 ; NODOT-NEXT:    vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; NODOT-NEXT:    vsext.vf2 v20, v0
-; NODOT-NEXT:    vsext.vf2 v16, v8
-; NODOT-NEXT:    vwmul.vv v0, v20, v16
+; NODOT-NEXT:    vsext.vf2 v16, v0
+; NODOT-NEXT:    vsext.vf2 v4, v8
+; NODOT-NEXT:    vwmul.vv v0, v16, v4
 ; NODOT-NEXT:    csrr a0, vlenb
 ; NODOT-NEXT:    slli a0, a0, 4
 ; NODOT-NEXT:    add a0, sp, a0
 ; NODOT-NEXT:    addi a0, a0, 16
 ; NODOT-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; NODOT-NEXT:    vsext.vf2 v20, v18
-; NODOT-NEXT:    vsext.vf2 v16, v10
-; NODOT-NEXT:    vwmacc.vv v0, v20, v16
+; NODOT-NEXT:    vsext.vf2 v16, v18
+; NODOT-NEXT:    vsext.vf2 v8, v10
+; NODOT-NEXT:    vwmacc.vv v0, v16, v8
 ; NODOT-NEXT:    csrr a0, vlenb
 ; NODOT-NEXT:    slli a0, a0, 4
 ; NODOT-NEXT:    add a0, sp, a0
@@ -742,8 +728,8 @@ define <vscale x 32 x i32> @partial_reduce_m16(<vscale x 128 x i8> %a, <vscale x
 ; NODOT-NEXT:    addi a0, a0, 16
 ; NODOT-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vsext.vf2 v8, v22
-; NODOT-NEXT:    vsext.vf2 v16, v14
-; NODOT-NEXT:    vwmacc.vv v0, v8, v16
+; NODOT-NEXT:    vsext.vf2 v12, v14
+; NODOT-NEXT:    vwmacc.vv v0, v8, v12
 ; NODOT-NEXT:    addi a0, sp, 16
 ; NODOT-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
 ; NODOT-NEXT:    vsext.vf2 v8, v14
@@ -836,9 +822,9 @@ define <vscale x 4 x i32> @partial_reduce_accum(<vscale x 16 x i8> %a, <vscale x
 ; NODOT-LABEL: partial_reduce_accum:
 ; NODOT:       # %bb.0: # %entry
 ; NODOT-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; NODOT-NEXT:    vsext.vf2 v24, v8
-; NODOT-NEXT:    vsext.vf2 v28, v10
-; NODOT-NEXT:    vwmul.vv v16, v24, v28
+; NODOT-NEXT:    vsext.vf2 v20, v8
+; NODOT-NEXT:    vsext.vf2 v8, v10
+; NODOT-NEXT:    vwmul.vv v16, v20, v8
 ; NODOT-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; NODOT-NEXT:    vadd.vv v8, v18, v20
 ; NODOT-NEXT:    vadd.vv v10, v12, v16
@@ -864,9 +850,9 @@ define <vscale x 16 x i32> @partial_reduce_via_accum(<vscale x 16 x i8> %a, <vsc
 ; CHECK-LABEL: partial_reduce_via_accum:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vsext.vf2 v16, v8
-; CHECK-NEXT:    vsext.vf2 v20, v10
-; CHECK-NEXT:    vwmul.vv v8, v16, v20
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vwmul.vv v8, v12, v16
 ; CHECK-NEXT:    ret
 entry:
   %a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>



More information about the llvm-commits mailing list