[llvm] b510e4c - [RISCV] Add a vsetvli insert pass that can be extended to be aware of incoming VL/VTYPE from other basic blocks.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon May 24 11:48:12 PDT 2021


Author: Craig Topper
Date: 2021-05-24T11:47:27-07:00
New Revision: b510e4cf1b96ab9c7e6d1c348f31c68f438b98cb

URL: https://github.com/llvm/llvm-project/commit/b510e4cf1b96ab9c7e6d1c348f31c68f438b98cb
DIFF: https://github.com/llvm/llvm-project/commit/b510e4cf1b96ab9c7e6d1c348f31c68f438b98cb.diff

LOG: [RISCV] Add a vsetvli insert pass that can be extended to be aware of incoming VL/VTYPE from other basic blocks.

This is a replacement for D101938 for inserting vsetvli
instructions where needed. This new version changes how
we track the information in such a way that we can extend
it to be aware of VL/VTYPE changes in other blocks. Given
how much it changes the previous patch, I've decided to
abandon the previous patch and post this from scratch.

For now the pass consists of a single phase that assumes
the incoming state from other basic blocks is unknown. A
follow up patch will extend this with a phase to collect
information about how VL/VTYPE change in each block and
a second phase to propagate this information to the entire
function. This will be used by a third phase to do the
vsetvli insertion.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D102737

Added: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir

Modified: 
    llvm/lib/Target/RISCV/CMakeLists.txt
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
    llvm/lib/Target/RISCV/RISCV.h
    llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
    llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
    llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
    llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
    llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
    llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir

Removed: 
    llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir
    llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll
    llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
    llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir


################################################################################
diff  --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index c4f3d3124d29e..961781bec193e 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -21,10 +21,10 @@ add_public_tablegen_target(RISCVCommonTableGen)
 add_llvm_target(RISCVCodeGen
   RISCVAsmPrinter.cpp
   RISCVCallLowering.cpp
-  RISCVCleanupVSETVLI.cpp
   RISCVExpandAtomicPseudoInsts.cpp
   RISCVExpandPseudoInsts.cpp
   RISCVFrameLowering.cpp
+  RISCVInsertVSETVLI.cpp
   RISCVInstrInfo.cpp
   RISCVInstructionSelector.cpp
   RISCVISelDAGToDAG.cpp

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index d65590e8d25d8..06be20a75c7fa 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -86,7 +86,7 @@ enum VConstraintType {
   VMConstraint = 0b100,
 };
 
-enum VLMUL {
+enum VLMUL : uint8_t {
   LMUL_1 = 0,
   LMUL_2,
   LMUL_4,

diff  --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ef386fe169203..ef1f97067e120 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -46,8 +46,8 @@ void initializeRISCVExpandPseudoPass(PassRegistry &);
 FunctionPass *createRISCVExpandAtomicPseudoPass();
 void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
 
-FunctionPass *createRISCVCleanupVSETVLIPass();
-void initializeRISCVCleanupVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVInsertVSETVLIPass();
+void initializeRISCVInsertVSETVLIPass(PassRegistry &);
 
 InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
                                                     RISCVSubtarget &,

diff  --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
deleted file mode 100644
index 74f437f0ed840..0000000000000
--- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a function pass that removes duplicate vsetvli
-// instructions within a basic block.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RISCV.h"
-#include "RISCVSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "riscv-cleanup-vsetvli"
-#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass"
-
-namespace {
-
-class RISCVCleanupVSETVLI : public MachineFunctionPass {
-public:
-  static char ID;
-
-  RISCVCleanupVSETVLI() : MachineFunctionPass(ID) {
-    initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry());
-  }
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
-
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::IsSSA);
-  }
-
-  // This pass modifies the program, but does not modify the CFG
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; }
-};
-
-} // end anonymous namespace
-
-char RISCVCleanupVSETVLI::ID = 0;
-
-INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE,
-                RISCV_CLEANUP_VSETVLI_NAME, false, false)
-
-static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
-  // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
-  // can't remove this VSETVLI.
-  if (!PrevVSETVLI || !MI.getOperand(0).isDead())
-    return false;
-
-  // Does this VSET{I}VLI use the same VTYPE immediate.
-  int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
-  int64_t VTYPEImm = MI.getOperand(2).getImm();
-  if (PrevVTYPEImm != VTYPEImm)
-    return false;
-
-  if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
-    // If the previous opcode wasn't vsetivli we can't compare them.
-    if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETIVLI)
-      return false;
-
-    // For VSETIVLI, we can just compare the immediates.
-    return PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
-  }
-
-  assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
-  Register AVLReg = MI.getOperand(1).getReg();
-  Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
-
-  // If this VSETVLI isn't changing VL, it is redundant.
-  if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
-    return true;
-
-  // If the previous VSET{I}VLI's output (which isn't X0) is fed into this
-  // VSETVLI, this one isn't changing VL so is redundant.
-  // Only perform this on virtual registers to avoid the complexity of having
-  // to work out if the physical register was clobbered somewhere in between.
-  if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
-    return true;
-
-  // If the previous opcode isn't vsetvli we can't do any more comparison.
-  if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
-    return false;
-
-  // Does this VSETVLI use the same AVL register?
-  if (AVLReg != PrevVSETVLI->getOperand(1).getReg())
-    return false;
-
-  // If the AVLReg is X0 we must be setting VL to VLMAX. Keeping VL unchanged
-  // was handled above.
-  if (AVLReg == RISCV::X0) {
-    // This instruction is setting VL to VLMAX, this is redundant if the
-    // previous VSETVLI was also setting VL to VLMAX. But it is not redundant
-    // if they were setting it to any other value or leaving VL unchanged.
-    return PrevOutVL != RISCV::X0;
-  }
-
-  // This vsetvli is redundant.
-  return true;
-}
-
-bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
-  bool Changed = false;
-  MachineInstr *PrevVSETVLI = nullptr;
-
-  for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
-    MachineInstr &MI = *MII++;
-
-    if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
-        MI.getOpcode() != RISCV::PseudoVSETIVLI) {
-      if (PrevVSETVLI &&
-          (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
-           MI.modifiesRegister(RISCV::VTYPE))) {
-        // Old VL/VTYPE is overwritten.
-        PrevVSETVLI = nullptr;
-      }
-      continue;
-    }
-
-    if (isRedundantVSETVLI(MI, PrevVSETVLI)) {
-      // This VSETVLI is redundant, remove it.
-      MI.eraseFromParent();
-      Changed = true;
-    } else {
-      // Otherwise update VSET{I}VLI for the next iteration.
-      PrevVSETVLI = &MI;
-    }
-  }
-
-  return Changed;
-}
-
-bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()))
-    return false;
-
-  // Skip if the vector extension is not enabled.
-  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
-  if (!ST.hasStdExtV())
-    return false;
-
-  bool Changed = false;
-
-  for (MachineBasicBlock &MBB : MF)
-    Changed |= runOnMachineBasicBlock(MBB);
-
-  return Changed;
-}
-
-/// Returns an instance of the Cleanup VSETVLI pass.
-FunctionPass *llvm::createRISCVCleanupVSETVLIPass() {
-  return new RISCVCleanupVSETVLI();
-}

diff  --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 581f26c64abc9..31ef752967cc2 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -240,7 +240,8 @@ bool RISCVExpandPseudo::expandLoadTLSGDAddress(
 
 bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) {
-  assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format");
+  assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 &&
+         "Unexpected instruction format");
 
   DebugLoc DL = MBBI->getDebugLoc();
 

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e786d5e656f77..7493439112b3f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6463,107 +6463,9 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
   return TailMBB;
 }
 
-static MachineInstr *elideCopies(MachineInstr *MI,
-                                 const MachineRegisterInfo &MRI) {
-  while (true) {
-    if (!MI->isFullCopy())
-      return MI;
-    if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
-      return nullptr;
-    MI = MRI.getVRegDef(MI->getOperand(1).getReg());
-    if (!MI)
-      return nullptr;
-  }
-}
-
-static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
-                                    int VLIndex, unsigned SEWIndex,
-                                    RISCVII::VLMUL VLMul,
-                                    bool ForceTailAgnostic) {
-  MachineFunction &MF = *BB->getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-
-  unsigned Log2SEW = MI.getOperand(SEWIndex).getImm();
-  unsigned SEW = 1 << Log2SEW;
-  assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
-
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  auto BuildVSETVLI = [&]() {
-    if (VLIndex >= 0) {
-      Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-      const MachineOperand &VLOp = MI.getOperand(VLIndex);
-
-      // VL can be a register or an immediate.
-      if (VLOp.isImm())
-        return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
-            .addReg(DestReg, RegState::Define | RegState::Dead)
-            .addImm(VLOp.getImm());
-
-      Register VLReg = MI.getOperand(VLIndex).getReg();
-      return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
-          .addReg(DestReg, RegState::Define | RegState::Dead)
-          .addReg(VLReg);
-    }
-
-    // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
-    return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
-        .addReg(RISCV::X0, RegState::Define | RegState::Dead)
-        .addReg(RISCV::X0, RegState::Kill);
-  };
-
-  MachineInstrBuilder MIB = BuildVSETVLI();
-
-  // Default to tail agnostic unless the destination is tied to a source. In
-  // that case the user would have some control over the tail values. The tail
-  // policy is also ignored on instructions that only update element 0 like
-  // vmv.s.x or reductions so use agnostic there to match the common case.
-  // FIXME: This is conservatively correct, but we might want to detect that
-  // the input is undefined.
-  bool TailAgnostic = true;
-  unsigned UseOpIdx;
-  if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
-    TailAgnostic = false;
-    // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
-    const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
-    MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
-    if (UseMI) {
-      UseMI = elideCopies(UseMI, MRI);
-      if (UseMI && UseMI->isImplicitDef())
-        TailAgnostic = true;
-    }
-  }
-
-  // For simplicity we reuse the vtype representation here.
-  MIB.addImm(RISCVVType::encodeVTYPE(VLMul, SEW,
-                                     /*TailAgnostic*/ TailAgnostic,
-                                     /*MaskAgnostic*/ false));
-
-  // Remove (now) redundant operands from pseudo
-  if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) {
-    MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
-    MI.getOperand(VLIndex).setIsKill(false);
-  }
-
-  return BB;
-}
-
 MachineBasicBlock *
 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const {
-  uint64_t TSFlags = MI.getDesc().TSFlags;
-
-  if (RISCVII::hasSEWOp(TSFlags)) {
-    unsigned NumOperands = MI.getNumExplicitOperands();
-    int VLIndex = RISCVII::hasVLOp(TSFlags) ? NumOperands - 2 : -1;
-    unsigned SEWIndex = NumOperands - 1;
-    bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
-
-    RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
-    return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
-  }
-
   switch (MI.getOpcode()) {
   default:
     llvm_unreachable("Unexpected instr type to insert");

diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
new file mode 100644
index 0000000000000..c3ddd57d5caa1
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -0,0 +1,396 @@
+//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function pass that inserts VSETVLI instructions where
+// needed.
+//
+// The pass consists of a single pass over each basic block looking for changes
+// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the
+// VL/VTYPE values are unknown from predecessors so the first vector instruction
+// will always require a new VSETVLI.
+//
+// TODO: Future enhancements to this pass will take into account VL/VTYPE from
+// predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-vsetvli"
+#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
+
+namespace {
+
+class VSETVLIInfo {
+  union {
+    Register AVLReg;
+    unsigned AVLImm;
+  };
+
+  enum : uint8_t {
+    Uninitialized,
+    AVLIsReg,
+    AVLIsImm,
+    Unknown,
+  } State = Uninitialized;
+
+  // Fields from VTYPE.
+  RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
+  uint8_t SEW = 0;
+  bool TailAgnostic = false;
+  bool MaskAgnostic = false;
+
+public:
+  VSETVLIInfo() : AVLImm(0) {}
+
+  bool isValid() const { return State != Uninitialized; }
+  void setUnknown() { State = Unknown; }
+  bool isUnknown() const { return State == Unknown; }
+
+  void setAVLReg(Register Reg) {
+    AVLReg = Reg;
+    State = AVLIsReg;
+  }
+
+  void setAVLImm(unsigned Imm) {
+    AVLImm = Imm;
+    State = AVLIsImm;
+  }
+
+  bool hasAVLImm() const { return State == AVLIsImm; }
+  bool hasAVLReg() const { return State == AVLIsReg; }
+  Register getAVLReg() const {
+    assert(hasAVLReg());
+    return AVLReg;
+  }
+  unsigned getAVLImm() const {
+    assert(hasAVLImm());
+    return AVLImm;
+  }
+
+  bool hasSameAVL(const VSETVLIInfo &Other) const {
+    assert(isValid() && Other.isValid() &&
+           "Can't compare invalid VSETVLIInfos");
+    assert(!isUnknown() && !Other.isUnknown() &&
+           "Can't compare AVL in unknown state");
+    if (hasAVLReg() && Other.hasAVLReg())
+      return getAVLReg() == Other.getAVLReg();
+
+    if (hasAVLImm() && Other.hasAVLImm())
+      return getAVLImm() == Other.getAVLImm();
+
+    return false;
+  }
+
+  void setVTYPE(unsigned VType) {
+    assert(isValid() && !isUnknown() &&
+           "Can't set VTYPE for uninitialized or unknown");
+    VLMul = RISCVVType::getVLMUL(VType);
+    SEW = RISCVVType::getSEW(VType);
+    TailAgnostic = RISCVVType::isTailAgnostic(VType);
+    MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+  }
+  void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
+    assert(isValid() && !isUnknown() &&
+           "Can't set VTYPE for uninitialized or unknown");
+    VLMul = L;
+    SEW = S;
+    TailAgnostic = TA;
+    MaskAgnostic = MA;
+  }
+
+  unsigned encodeVTYPE() const {
+    return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+  }
+
+  bool hasSameVTYPE(const VSETVLIInfo &Other) const {
+    assert(isValid() && Other.isValid() &&
+           "Can't compare invalid VSETVLIInfos");
+    assert(!isUnknown() && !Other.isUnknown() &&
+           "Can't compare VTYPE in unknown state");
+    return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+           std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
+                    Other.MaskAgnostic);
+  }
+
+  bool isCompatible(const VSETVLIInfo &Other) const {
+    assert(isValid() && Other.isValid() &&
+           "Can't compare invalid VSETVLIInfos");
+    // Nothing is compatible with Unknown.
+    if (isUnknown() || Other.isUnknown())
+      return false;
+
+    // If other doesn't need an AVLReg and the SEW matches, consider it
+    // compatible.
+    if (Other.hasAVLReg() && Other.AVLReg == RISCV::NoRegister) {
+      if (SEW == Other.SEW)
+        return true;
+    }
+
+    // VTypes must match.
+    if (!hasSameVTYPE(Other))
+      return false;
+
+    if (hasAVLImm() != Other.hasAVLImm())
+      return false;
+
+    if (hasAVLImm())
+      return getAVLImm() == Other.getAVLImm();
+
+    return getAVLReg() == Other.getAVLReg();
+  }
+};
+
+class RISCVInsertVSETVLI : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+
+public:
+  static char ID;
+
+  RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
+    initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
+
+private:
+  void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
+                     const VSETVLIInfo &Info);
+
+  bool emitVSETVLIs(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertVSETVLI::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
+                false, false)
+
+static MachineInstr *elideCopies(MachineInstr *MI,
+                                 const MachineRegisterInfo *MRI) {
+  while (true) {
+    if (!MI->isFullCopy())
+      return MI;
+    if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
+      return nullptr;
+    MI = MRI->getVRegDef(MI->getOperand(1).getReg());
+    if (!MI)
+      return nullptr;
+  }
+}
+
+static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
+                                       const MachineRegisterInfo *MRI) {
+  VSETVLIInfo InstrInfo;
+  unsigned NumOperands = MI.getNumExplicitOperands();
+
+  RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+
+  unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
+  unsigned SEW = 1 << Log2SEW;
+  assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+  // Default to tail agnostic unless the destination is tied to a source.
+  // Unless the source is undef. In that case the user would have some control
+  // over the tail values. The tail policy is also ignored on instructions
+  // that only update element 0 like vmv.s.x or reductions so use agnostic
+  // there to match the common case.
+  // FIXME: This is conservatively correct, but we might want to detect that
+  // the input is undefined.
+  bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
+  bool TailAgnostic = true;
+  unsigned UseOpIdx;
+  if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+    TailAgnostic = false;
+    // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
+    const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+    MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
+    if (UseMI) {
+      UseMI = elideCopies(UseMI, MRI);
+      if (UseMI && UseMI->isImplicitDef())
+        TailAgnostic = true;
+    }
+  }
+
+  if (RISCVII::hasVLOp(TSFlags)) {
+    const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
+    if (VLOp.isImm())
+      InstrInfo.setAVLImm(VLOp.getImm());
+    else
+      InstrInfo.setAVLReg(VLOp.getReg());
+  } else
+    InstrInfo.setAVLReg(RISCV::NoRegister);
+  InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
+                     /*MaskAgnostic*/ false);
+
+  return InstrInfo;
+}
+
+void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
+                                       const VSETVLIInfo &Info) {
+  DebugLoc DL = MI.getDebugLoc();
+
+  if (Info.hasAVLImm()) {
+    // TODO: Use X0 as the destination.
+    Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+    BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
+        .addReg(DestReg, RegState::Define | RegState::Dead)
+        .addImm(Info.getAVLImm())
+        .addImm(Info.encodeVTYPE());
+    return;
+  }
+
+  Register AVLReg = Info.getAVLReg();
+  if (AVLReg == RISCV::NoRegister) {
+    BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+        .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+        .addReg(RISCV::X0, RegState::Kill)
+        .addImm(Info.encodeVTYPE())
+        .addReg(RISCV::VL, RegState::Implicit);
+    return;
+  }
+
+  Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+  BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+      .addReg(DestReg, RegState::Define | RegState::Dead)
+      .addReg(Info.getAVLReg())
+      .addImm(Info.encodeVTYPE());
+}
+
+// Return a VSETVLIInfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
+  VSETVLIInfo NewInfo;
+  if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+    Register AVLReg = MI.getOperand(1).getReg();
+    assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
+           "Can't handle X0, X0 vsetvli yet");
+    NewInfo.setAVLReg(AVLReg);
+  } else {
+    assert(MI.getOpcode() == RISCV::PseudoVSETIVLI);
+    NewInfo.setAVLImm(MI.getOperand(1).getImm());
+  }
+  NewInfo.setVTYPE(MI.getOperand(2).getImm());
+
+  return NewInfo;
+}
+
+bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
+  bool MadeChange = false;
+
+  // Assume predecessor state is unknown.
+  VSETVLIInfo CurInfo;
+  CurInfo.setUnknown();
+
+  for (MachineInstr &MI : MBB) {
+    // If this is an explicit VSETVLI or VSETIVLI, update our state.
+    if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+        MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+      // Conservatively, mark the VL and VTYPE as live.
+      assert(MI.getOperand(3).getReg() == RISCV::VL &&
+             MI.getOperand(4).getReg() == RISCV::VTYPE &&
+             "Unexpected operands where VL and VTYPE should be");
+      MI.getOperand(3).setIsDead(false);
+      MI.getOperand(4).setIsDead(false);
+      MadeChange = true;
+      CurInfo = getInfoForVSETVLI(MI);
+      continue;
+    }
+
+    uint64_t TSFlags = MI.getDesc().TSFlags;
+    if (RISCVII::hasSEWOp(TSFlags)) {
+      VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+      if (RISCVII::hasVLOp(TSFlags)) {
+        MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
+        if (VLOp.isReg()) {
+          // Erase the AVL operand from the instruction.
+          VLOp.setReg(RISCV::NoRegister);
+          VLOp.setIsKill(false);
+        }
+        MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
+                                                /*isImp*/ true));
+      }
+      MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
+                                              /*isImp*/ true));
+
+      bool NeedVSETVLI = true;
+      if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo))
+        NeedVSETVLI = false;
+
+      // We didn't find a compatible value. If our AVL is a virtual register,
+      // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+      // and the last VL/VTYPE we observed is the same, we don't need a
+      // VSETVLI here.
+      if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() &&
+          NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) {
+        if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) {
+          if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
+              DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
+            VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+            if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
+              NeedVSETVLI = false;
+          }
+        }
+      }
+
+      // If this instruction isn't compatible with the previous VL/VTYPE
+      // we need to insert a VSETVLI.
+      if (NeedVSETVLI) {
+        insertVSETVLI(MBB, MI, NewInfo);
+        CurInfo = NewInfo;
+      }
+
+      // If we find an instruction we at least changed the operands.
+      MadeChange = true;
+    }
+    // If this is something updates VL/VTYPE that we don't know about, set
+    // the state to unknown.
+    if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
+        MI.modifiesRegister(RISCV::VTYPE)) {
+      VSETVLIInfo NewInfo;
+      NewInfo.setUnknown();
+      CurInfo = NewInfo;
+    }
+  }
+
+  return MadeChange;
+}
+
+bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
+  // Skip if the vector extension is not enabled.
+  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+  if (!ST.hasStdExtV())
+    return false;
+
+  TII = ST.getInstrInfo();
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF)
+    Changed |= emitVSETVLIs(MBB);
+
+  return Changed;
+}
+
+/// Returns an instance of the Insert VSETVLI pass.
+FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
+  return new RISCVInsertVSETVLI();
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e96a7f5517491..c79e444d559ea 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -619,8 +619,6 @@ class VPseudoUSLoadNoMask<VReg RetClass, bits<7> EEW, bit isFF> :
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -637,9 +635,7 @@ class VPseudoUSLoadMask<VReg RetClass, bits<7> EEW, bit isFF> :
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = "$rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -654,8 +650,6 @@ class VPseudoSLoadNoMask<VReg RetClass, bits<7> EEW>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -672,9 +666,7 @@ class VPseudoSLoadMask<VReg RetClass, bits<7> EEW>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = "$rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -690,8 +682,6 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -710,9 +700,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -727,8 +715,6 @@ class VPseudoUSStoreNoMask<VReg StClass, bits<7> EEW>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -743,8 +729,6 @@ class VPseudoUSStoreMask<VReg StClass, bits<7> EEW>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -758,8 +742,6 @@ class VPseudoSStoreNoMask<VReg StClass, bits<7> EEW>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -774,8 +756,6 @@ class VPseudoSStoreMask<VReg StClass, bits<7> EEW>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -790,8 +770,6 @@ class VPseudoUnaryNoDummyMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -804,8 +782,6 @@ class VPseudoNullaryNoMask<VReg RegClass>:
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -819,9 +795,7 @@ class VPseudoNullaryMask<VReg RegClass>:
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints ="$rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -836,8 +810,6 @@ class VPseudoNullaryPseudoM<string BaseInst>
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   // BaseInstr is not used in RISCVExpandPseudoInsts pass.
@@ -853,9 +825,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint =
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Constraint;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -870,9 +840,7 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -887,8 +855,6 @@ class VPseudoMaskUnarySOutMask:
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -906,9 +872,7 @@ class VPseudoUnaryAnyMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = "@earlyclobber $rd, $rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -925,9 +889,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Constraint;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -943,8 +905,6 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -960,8 +920,6 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -979,9 +937,7 @@ class VPseudoBinaryMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1001,9 +957,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1025,9 +979,7 @@ class VPseudoBinaryCarryIn<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Constraint;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 0;
@@ -1047,9 +999,7 @@ class VPseudoTernaryNoMask<VReg RetClass,
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1068,9 +1018,7 @@ class VPseudoAMOWDNoMask<VReg RetClass,
   let mayLoad = 1;
   let mayStore = 1;
   let hasSideEffects = 1;
-  let usesCustomInserter = 1;
   let Constraints = "$vd_wd = $vd";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1088,9 +1036,7 @@ class VPseudoAMOWDMask<VReg RetClass,
   let mayLoad = 1;
   let mayStore = 1;
   let hasSideEffects = 1;
-  let usesCustomInserter = 1;
   let Constraints = "$vd_wd = $vd";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -1131,8 +1077,6 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1148,9 +1092,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = "$rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1166,8 +1108,6 @@ class VPseudoSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1183,9 +1123,7 @@ class VPseudoSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   let Constraints = "$rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1201,11 +1139,9 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> L
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   // For vector indexed segment loads, the destination vector register groups
   // cannot overlap the source vector register group
   let Constraints = "@earlyclobber $rd";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1222,11 +1158,9 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMU
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
   // For vector indexed segment loads, the destination vector register groups
   // cannot overlap the source vector register group
   let Constraints = "@earlyclobber $rd, $rd = $merge";
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasMergeOp = 1;
@@ -1241,8 +1175,6 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1258,8 +1190,6 @@ class VPseudoUSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -1273,8 +1203,6 @@ class VPseudoSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1290,8 +1218,6 @@ class VPseudoSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -1307,8 +1233,6 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3>
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
@@ -1325,8 +1249,6 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3> LM
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
-  let usesCustomInserter = 1;
-  let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -3521,7 +3443,7 @@ let Defs = [VXSAT], hasSideEffects = 1 in {
 //===----------------------------------------------------------------------===//
 // 13.2. Vector Single-Width Averaging Add and Subtract
 //===----------------------------------------------------------------------===//
-let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
+let Uses = [VXRM], hasSideEffects = 1 in {
   defm PseudoVAADDU      : VPseudoBinaryV_VV_VX;
   defm PseudoVAADD       : VPseudoBinaryV_VV_VX;
   defm PseudoVASUBU      : VPseudoBinaryV_VV_VX;
@@ -3531,14 +3453,14 @@ let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
 //===----------------------------------------------------------------------===//
 // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
 //===----------------------------------------------------------------------===//
-let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
+let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
   defm PseudoVSMUL      : VPseudoBinaryV_VV_VX;
 }
 
 //===----------------------------------------------------------------------===//
 // 13.4. Vector Single-Width Scaling Shift Instructions
 //===----------------------------------------------------------------------===//
-let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
+let Uses = [VXRM], hasSideEffects = 1 in {
   defm PseudoVSSRL        : VPseudoBinaryV_VV_VX_VI<uimm5>;
   defm PseudoVSSRA        : VPseudoBinaryV_VV_VX_VI<uimm5>;
 }
@@ -3546,7 +3468,7 @@ let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in {
 //===----------------------------------------------------------------------===//
 // 13.5. Vector Narrowing Fixed-Point Clip Instructions
 //===----------------------------------------------------------------------===//
-let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
+let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
   defm PseudoVNCLIP     : VPseudoBinaryV_WV_WX_WI;
   defm PseudoVNCLIPU    : VPseudoBinaryV_WV_WX_WI;
 }
@@ -3792,8 +3714,7 @@ defm PseudoVID : VPseudoMaskNullaryV;
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtV] in {
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
-    Uses = [VL, VTYPE] in {
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
   foreach m = MxList.m in {
     let VLMul = m.value in {
       let HasSEWOp = 1, BaseInstr = VMV_X_S in
@@ -3816,8 +3737,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtV, HasStdExtF] in {
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
-    Uses = [VL, VTYPE] in {
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
   foreach m = MxList.m in {
     foreach f = FPList.fpinfo in {
       let VLMul = m.value in {

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 9fe8b51fbab04..b18ee60092173 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -39,7 +39,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeGlobalISel(*PR);
   initializeRISCVMergeBaseOffsetOptPass(*PR);
   initializeRISCVExpandPseudoPass(*PR);
-  initializeRISCVCleanupVSETVLIPass(*PR);
+  initializeRISCVInsertVSETVLIPass(*PR);
 }
 
 static StringRef computeDataLayout(const Triple &TT) {
@@ -191,8 +191,7 @@ void RISCVPassConfig::addPreEmitPass2() {
 }
 
 void RISCVPassConfig::addPreRegAlloc() {
-  if (TM->getOptLevel() != CodeGenOpt::None) {
+  if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createRISCVMergeBaseOffsetOptPass());
-    addPass(createRISCVCleanupVSETVLIPass());
-  }
+  addPass(createRISCVInsertVSETVLIPass());
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir
deleted file mode 100644
index 21eca8496fa95..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir
+++ /dev/null
@@ -1,56 +0,0 @@
-# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s  \
-# RUN:     -start-before=finalize-isel -stop-after=finalize-isel -o - \
-# RUN:     | FileCheck --check-prefix=POST-INSERTER %s
-
-# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s  \
-# RUN:     -start-before=finalize-isel -o - \
-# RUN:     | FileCheck --check-prefix=CODEGEN %s
-
---- |
-  define void @vadd_vint64m1(
-            <vscale x 1 x i64> *%pc,
-            <vscale x 1 x i64> *%pa,
-            <vscale x 1 x i64> *%pb,
-            i64 %vl)
-  {
-    ret void
-  }
-...
----
-name: vadd_vint64m1
-tracksRegLiveness: true
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: $x10, $x11, $x12, $x13
-
-    %3:gpr = COPY $x13
-    %2:gpr = COPY $x12
-    %1:gpr = COPY $x11
-    %0:gpr = COPY $x10
-    %4:vr = PseudoVLE64_V_M1 %1, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
-    %5:vr = PseudoVLE64_V_M1 %2, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8)
-    %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, %3, 6, implicit $vl, implicit $vtype
-    PseudoVSE64_V_M1 killed %6, %0, %3, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8)
-    PseudoRET
-
-...
-
-# POST-INSERTER: %0:gpr = COPY $x13
-# POST-INSERTER: %1:gpr = COPY $x12
-# POST-INSERTER: %2:gpr = COPY $x11
-# POST-INSERTER: %3:gpr = COPY $x10
-# POST-INSERTER: dead %7:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
-# POST-INSERTER: %4:vr = PseudoVLE64_V_M1 %2, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
-# POST-INSERTER: dead %8:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
-# POST-INSERTER: %5:vr = PseudoVLE64_V_M1 %1, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8)
-# POST-INSERTER: dead %9:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
-# POST-INSERTER: %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, $noreg, 6, implicit $vl, implicit $vtype
-# POST-INSERTER: dead %10:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype
-# POST-INSERTER: PseudoVSE64_V_M1 killed %6, %3, $noreg, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8)
-
-# CODEGEN: vsetvli	a3, a3, e64,m1,ta,mu
-# CODEGEN-NEXT: vle64.v	v25, (a1)
-# CODEGEN-NEXT: vle64.v	v26, (a2)
-# CODEGEN-NEXT: vadd.vv	v25, v25, v26
-# CODEGEN-NEXT: vse64.v	v25, (a0)
-# CODEGEN-NEXT: ret

diff  --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll
deleted file mode 100644
index b9482343a4206..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; This test shows the evolution of RVV pseudo instructions within isel.
-
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o %t.pre.mir \
-; RUN:     -stop-before=finalize-isel
-; RUN: cat %t.pre.mir | FileCheck --check-prefix=PRE-INSERTER %s
-
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %t.pre.mir -o %t.post.mir \
-; RUN:     -start-before=finalize-isel -stop-after=finalize-isel
-; RUN: cat %t.post.mir | FileCheck --check-prefix=POST-INSERTER %s
-
-define void @vadd_vint64m1(
-          <vscale x 1 x i64> *%pc,
-          <vscale x 1 x i64> *%pa,
-          <vscale x 1 x i64> *%pb)
-{
-  %va = load <vscale x 1 x i64>, <vscale x 1 x i64>* %pa
-  %vb = load <vscale x 1 x i64>, <vscale x 1 x i64>* %pb
-  %vc = add <vscale x 1 x i64> %va, %vb
-  store <vscale x 1 x i64> %vc, <vscale x 1 x i64> *%pc
-  ret void
-}
-
-; PRE-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8)
-; PRE-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8)
-; PRE-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $x0, 6, implicit $vl, implicit $vtype
-; PRE-INSERTER:  VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8)
-
-; POST-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8)
-; POST-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8)
-; POST-INSERTER: dead %6:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype
-; POST-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $noreg, 6, implicit $vl, implicit $vtype
-; POST-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index e560a709f935d..30f96aa6e782c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
+# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
 
 --- |
   define void @add_scalable_offset(
@@ -55,7 +55,7 @@ body: |
     ; CHECK: PseudoRET
     %1:gpr = COPY $x11
     %0:gpr = COPY $x10
-    %2:vr = PseudoVLE64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
+    %2:vr = PseudoVLE64_V_M1 %0, %1, 6 :: (load unknown-size from %ir.pa, align 8)
     %3:gpr = ADDI %stack.2, 0
     VS1R_V killed %2:vr, %3:gpr
     PseudoRET

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
deleted file mode 100644
index ed8bc56980625..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
+++ /dev/null
@@ -1,46 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
-
-# Make sure we don't combine these VSET{I}VLIs in the cleanup pass. We could not
-# 
diff erentiate AVL values if the opcode of the previous one is 
diff erent from
-# current one.
-
---- |
-  ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll'
-  source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll"
-  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
-  target triple = "riscv64"
-
-  define void @cleanup_vsetivli() #0 {
-    ret void
-  }
-
-  attributes #0 = { "target-features"="+experimental-v" }
-
-...
----
-name:            cleanup_vsetivli
-alignment:       4
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr }
-frameInfo:
-  maxAlignment:    1
-machineFunctionInfo: {}
-body:             |
-  bb.0 (%ir-block.0):
-    ; CHECK-LABEL: name: cleanup_vsetivli
-    ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: PseudoRET
-    dead %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    dead %1:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
-    dead %2:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
-    dead %3:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    dead %4:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    dead %5:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
-    PseudoRET
-
-...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
deleted file mode 100644
index d22f63f844cc7..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir
+++ /dev/null
@@ -1,79 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
-
---- |
-  ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
-  source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
-  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
-  target triple = "riscv64"
-
-  define void @cleanup_vsetvli0() #0 {
-    ret void
-  }
-
-  define void @cleanup_vsetvli1() #0 {
-    ret void
-  }
-
-  attributes #0 = { "target-features"="+experimental-v" }
-
-...
----
-# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
-# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
-# the first since we can't tell if this is a change of VL.
-name:            cleanup_vsetvli0
-alignment:       4
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr }
-frameInfo:
-  maxAlignment:    1
-machineFunctionInfo: {}
-body:             |
-  bb.0 (%ir-block.0):
-    ; CHECK-LABEL: name: cleanup_vsetvli0
-    ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: PseudoRET
-    dead $x0  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    dead %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    PseudoRET
-
-...
----
-# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
-# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
-# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
-# physical register which is clobbered by a later instruction.
-name:            cleanup_vsetvli1
-alignment:       4
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr }
-frameInfo:
-  maxAlignment:    1
-machineFunctionInfo: {}
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: $x3
-    ; CHECK-LABEL: name: cleanup_vsetvli1
-    ; CHECK: liveins: $x3
-    ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: $x1 = COPY $x3
-    ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
-    ; CHECK: PseudoRET
-    %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    dead %1:gpr  = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
-
-    %2:gpr  = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
-    dead %3:gpr  = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
-
-    $x1  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
-    $x1 = COPY $x3
-    dead %4:gpr  = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
-    PseudoRET
-
-...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 7c6c22fad9b97..89efeb392c4ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -3667,11 +3667,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a3, a1, 819
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a7, a1, -241
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a2, a1, 257
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX2-RV32-NEXT:    lui a2, 4112
+; LMULMAX2-RV32-NEXT:    addi a2, a2, 257
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB3_2
 ; LMULMAX2-RV32-NEXT:  # %bb.1:
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
@@ -3726,12 +3727,13 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v25, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v25
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB3_5
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v26
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB3_5
 ; LMULMAX2-RV32-NEXT:  # %bb.4:
-; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
-; LMULMAX2-RV32-NEXT:    or a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
+; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -3756,8 +3758,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB3_6
 ; LMULMAX2-RV32-NEXT:  .LBB3_5:
-; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
-; LMULMAX2-RV32-NEXT:    or a1, a1, a5
+; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
+; LMULMAX2-RV32-NEXT:    or a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -3900,11 +3902,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a3, a1, 819
 ; LMULMAX1-RV32-NEXT:    lui a1, 61681
 ; LMULMAX1-RV32-NEXT:    addi a7, a1, -241
-; LMULMAX1-RV32-NEXT:    lui a1, 4112
-; LMULMAX1-RV32-NEXT:    addi a2, a1, 257
-; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    lui a2, 4112
+; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
 ; LMULMAX1-RV32-NEXT:    bnez a5, .LBB3_2
 ; LMULMAX1-RV32-NEXT:  # %bb.1:
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 2
@@ -3959,12 +3962,13 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a6
-; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
-; LMULMAX1-RV32-NEXT:    vmv.x.s a5, v25
-; LMULMAX1-RV32-NEXT:    bnez a1, .LBB3_5
+; LMULMAX1-RV32-NEXT:    vmv.x.s a5, v26
+; LMULMAX1-RV32-NEXT:    bnez a5, .LBB3_5
 ; LMULMAX1-RV32-NEXT:  # %bb.4:
-; LMULMAX1-RV32-NEXT:    srli a1, a5, 1
-; LMULMAX1-RV32-NEXT:    or a1, a5, a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    srli a5, a1, 1
+; LMULMAX1-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 4
@@ -3989,8 +3993,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 32
 ; LMULMAX1-RV32-NEXT:    j .LBB3_6
 ; LMULMAX1-RV32-NEXT:  .LBB3_5:
-; LMULMAX1-RV32-NEXT:    srli a5, a1, 1
-; LMULMAX1-RV32-NEXT:    or a1, a1, a5
+; LMULMAX1-RV32-NEXT:    srli a1, a5, 1
+; LMULMAX1-RV32-NEXT:    or a1, a5, a1
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 4
@@ -11120,11 +11124,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a3, a1, 819
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a7, a1, -241
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a2, a1, 257
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX2-RV32-NEXT:    lui a2, 4112
+; LMULMAX2-RV32-NEXT:    addi a2, a2, 257
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_2
 ; LMULMAX2-RV32-NEXT:  # %bb.1:
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
@@ -11179,12 +11184,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v28, v26, 3
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v30, v28, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v30
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB7_5
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v30
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_5
 ; LMULMAX2-RV32-NEXT:  # %bb.4:
-; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
-; LMULMAX2-RV32-NEXT:    or a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
+; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
+; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11209,8 +11215,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB7_6
 ; LMULMAX2-RV32-NEXT:  .LBB7_5:
-; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
-; LMULMAX2-RV32-NEXT:    or a1, a1, a5
+; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
+; LMULMAX2-RV32-NEXT:    or a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11237,12 +11243,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v28, v26, 2
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v30, v28, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v30
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB7_8
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v30
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_8
 ; LMULMAX2-RV32-NEXT:  # %bb.7:
-; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
-; LMULMAX2-RV32-NEXT:    or a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
+; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
+; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11267,8 +11274,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB7_9
 ; LMULMAX2-RV32-NEXT:  .LBB7_8:
-; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
-; LMULMAX2-RV32-NEXT:    or a1, a1, a5
+; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
+; LMULMAX2-RV32-NEXT:    or a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11295,12 +11302,13 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v26, v26, 1
 ; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v26, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v26
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB7_11
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_11
 ; LMULMAX2-RV32-NEXT:  # %bb.10:
-; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
-; LMULMAX2-RV32-NEXT:    or a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
+; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11325,8 +11333,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB7_12
 ; LMULMAX2-RV32-NEXT:  .LBB7_11:
-; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
-; LMULMAX2-RV32-NEXT:    or a1, a1, a5
+; LMULMAX2-RV32-NEXT:    srli a1, a5, 1
+; LMULMAX2-RV32-NEXT:    or a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 2
 ; LMULMAX2-RV32-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 4
@@ -11544,13 +11552,14 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a4, a2, 819
 ; LMULMAX1-RV32-NEXT:    lui a2, 61681
 ; LMULMAX1-RV32-NEXT:    addi t0, a2, -241
-; LMULMAX1-RV32-NEXT:    lui a2, 4112
-; LMULMAX1-RV32-NEXT:    addi a3, a2, 257
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v26
+; LMULMAX1-RV32-NEXT:    lui a3, 4112
+; LMULMAX1-RV32-NEXT:    addi a3, a3, 257
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_2
 ; LMULMAX1-RV32-NEXT:  # %bb.1:
-; LMULMAX1-RV32-NEXT:    srli a1, a2, 1
-; LMULMAX1-RV32-NEXT:    or a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
+; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 4
@@ -11604,11 +11613,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v26, v26, 1
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v27, v26, a7
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v27
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v26
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_5
 ; LMULMAX1-RV32-NEXT:  # %bb.4:
-; LMULMAX1-RV32-NEXT:    srli a1, a2, 1
-; LMULMAX1-RV32-NEXT:    or a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
+; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 4
@@ -11663,11 +11673,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a7
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v25
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_8
 ; LMULMAX1-RV32-NEXT:  # %bb.7:
-; LMULMAX1-RV32-NEXT:    srli a1, a2, 1
-; LMULMAX1-RV32-NEXT:    or a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
+; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 4
@@ -11721,11 +11732,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a7
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v25
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_11
 ; LMULMAX1-RV32-NEXT:  # %bb.10:
-; LMULMAX1-RV32-NEXT:    srli a1, a2, 1
-; LMULMAX1-RV32-NEXT:    or a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
+; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 2
 ; LMULMAX1-RV32-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 4

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 2674d2bdeb3ae..85ad0aee92db9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -2538,9 +2538,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX2-RV32-NEXT:    sw zero, 4(sp)
 ; LMULMAX2-RV32-NEXT:    addi a6, zero, 32
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v25, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a4, a1, 1365
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
@@ -2548,13 +2545,16 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a7, a1, -241
 ; LMULMAX2-RV32-NEXT:    lui a2, 4112
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v25
 ; LMULMAX2-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB3_2
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB3_2
 ; LMULMAX2-RV32-NEXT:  # %bb.1:
-; LMULMAX2-RV32-NEXT:    addi a1, a5, -1
-; LMULMAX2-RV32-NEXT:    not a5, a5
-; LMULMAX2-RV32-NEXT:    and a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v25, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
+; LMULMAX2-RV32-NEXT:    not a1, a1
+; LMULMAX2-RV32-NEXT:    and a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX2-RV32-NEXT:    sub a1, a1, a5
@@ -2570,9 +2570,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB3_3
 ; LMULMAX2-RV32-NEXT:  .LBB3_2:
-; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
-; LMULMAX2-RV32-NEXT:    not a1, a1
-; LMULMAX2-RV32-NEXT:    and a1, a1, a5
+; LMULMAX2-RV32-NEXT:    addi a1, a5, -1
+; LMULMAX2-RV32-NEXT:    not a5, a5
+; LMULMAX2-RV32-NEXT:    and a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX2-RV32-NEXT:    sub a1, a1, a5
@@ -2590,10 +2590,11 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v25
-; LMULMAX2-RV32-NEXT:    vsrl.vx v25, v25, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB3_5
 ; LMULMAX2-RV32-NEXT:  # %bb.4:
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v25, v25, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
 ; LMULMAX2-RV32-NEXT:    not a1, a1
 ; LMULMAX2-RV32-NEXT:    and a1, a1, a5
@@ -2719,9 +2720,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX1-RV32-NEXT:    sw zero, 4(sp)
 ; LMULMAX1-RV32-NEXT:    addi a6, zero, 32
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a6
-; LMULMAX1-RV32-NEXT:    vmv.x.s a5, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 349525
 ; LMULMAX1-RV32-NEXT:    addi a4, a1, 1365
 ; LMULMAX1-RV32-NEXT:    lui a1, 209715
@@ -2729,13 +2727,16 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    lui a1, 61681
 ; LMULMAX1-RV32-NEXT:    addi a7, a1, -241
 ; LMULMAX1-RV32-NEXT:    lui a2, 4112
-; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    vmv.x.s a5, v25
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX1-RV32-NEXT:    bnez a1, .LBB3_2
+; LMULMAX1-RV32-NEXT:    bnez a5, .LBB3_2
 ; LMULMAX1-RV32-NEXT:  # %bb.1:
-; LMULMAX1-RV32-NEXT:    addi a1, a5, -1
-; LMULMAX1-RV32-NEXT:    not a5, a5
-; LMULMAX1-RV32-NEXT:    and a1, a5, a1
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX1-RV32-NEXT:    addi a5, a1, -1
+; LMULMAX1-RV32-NEXT:    not a1, a1
+; LMULMAX1-RV32-NEXT:    and a1, a1, a5
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a5
@@ -2751,9 +2752,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a5, a1, 32
 ; LMULMAX1-RV32-NEXT:    j .LBB3_3
 ; LMULMAX1-RV32-NEXT:  .LBB3_2:
-; LMULMAX1-RV32-NEXT:    addi a5, a1, -1
-; LMULMAX1-RV32-NEXT:    not a1, a1
-; LMULMAX1-RV32-NEXT:    and a1, a1, a5
+; LMULMAX1-RV32-NEXT:    addi a1, a5, -1
+; LMULMAX1-RV32-NEXT:    not a5, a5
+; LMULMAX1-RV32-NEXT:    and a1, a5, a1
 ; LMULMAX1-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a5
@@ -2771,10 +2772,11 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a5, v25
-; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a6
-; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX1-RV32-NEXT:    bnez a5, .LBB3_5
 ; LMULMAX1-RV32-NEXT:  # %bb.4:
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
 ; LMULMAX1-RV32-NEXT:    addi a5, a1, -1
 ; LMULMAX1-RV32-NEXT:    not a1, a1
 ; LMULMAX1-RV32-NEXT:    and a1, a1, a5
@@ -7647,9 +7649,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX2-RV32-NEXT:    sw zero, 4(sp)
 ; LMULMAX2-RV32-NEXT:    addi a6, zero, 32
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v26, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a4, a1, 1365
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
@@ -7657,13 +7656,16 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a7, a1, -241
 ; LMULMAX2-RV32-NEXT:    lui a2, 4112
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v26
 ; LMULMAX2-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX2-RV32-NEXT:    bnez a1, .LBB7_2
+; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_2
 ; LMULMAX2-RV32-NEXT:  # %bb.1:
-; LMULMAX2-RV32-NEXT:    addi a1, a5, -1
-; LMULMAX2-RV32-NEXT:    not a5, a5
-; LMULMAX2-RV32-NEXT:    and a1, a5, a1
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v26, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
+; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
+; LMULMAX2-RV32-NEXT:    not a1, a1
+; LMULMAX2-RV32-NEXT:    and a1, a1, a5
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX2-RV32-NEXT:    sub a1, a1, a5
@@ -7679,9 +7681,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, 32
 ; LMULMAX2-RV32-NEXT:    j .LBB7_3
 ; LMULMAX2-RV32-NEXT:  .LBB7_2:
-; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
-; LMULMAX2-RV32-NEXT:    not a1, a1
-; LMULMAX2-RV32-NEXT:    and a1, a1, a5
+; LMULMAX2-RV32-NEXT:    addi a1, a5, -1
+; LMULMAX2-RV32-NEXT:    not a5, a5
+; LMULMAX2-RV32-NEXT:    and a1, a5, a1
 ; LMULMAX2-RV32-NEXT:    srli a5, a1, 1
 ; LMULMAX2-RV32-NEXT:    and a5, a5, a4
 ; LMULMAX2-RV32-NEXT:    sub a1, a1, a5
@@ -7699,10 +7701,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v28, v26, 3
 ; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
-; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v28, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_5
 ; LMULMAX2-RV32-NEXT:  # %bb.4:
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v28, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
 ; LMULMAX2-RV32-NEXT:    not a1, a1
 ; LMULMAX2-RV32-NEXT:    and a1, a1, a5
@@ -7741,10 +7744,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v28, v26, 2
 ; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v28
-; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v28, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_8
 ; LMULMAX2-RV32-NEXT:  # %bb.7:
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v28, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
 ; LMULMAX2-RV32-NEXT:    not a1, a1
 ; LMULMAX2-RV32-NEXT:    and a1, a1, a5
@@ -7783,10 +7787,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vslidedown.vi v26, v26, 1
 ; LMULMAX2-RV32-NEXT:    vmv.x.s a5, v26
-; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v26, a6
-; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
 ; LMULMAX2-RV32-NEXT:    bnez a5, .LBB7_11
 ; LMULMAX2-RV32-NEXT:  # %bb.10:
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v26, a6
+; LMULMAX2-RV32-NEXT:    vmv.x.s a1, v26
 ; LMULMAX2-RV32-NEXT:    addi a5, a1, -1
 ; LMULMAX2-RV32-NEXT:    not a1, a1
 ; LMULMAX2-RV32-NEXT:    and a1, a1, a5
@@ -7962,25 +7967,25 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT:    addi a6, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v26, (a6)
+; LMULMAX1-RV32-NEXT:    addi a7, a0, 16
+; LMULMAX1-RV32-NEXT:    vle64.v v26, (a7)
 ; LMULMAX1-RV32-NEXT:    sw zero, 28(sp)
 ; LMULMAX1-RV32-NEXT:    sw zero, 20(sp)
-; LMULMAX1-RV32-NEXT:    addi a7, zero, 32
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vx v27, v26, a7
-; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v27
-; LMULMAX1-RV32-NEXT:    lui a2, 349525
-; LMULMAX1-RV32-NEXT:    addi a5, a2, 1365
-; LMULMAX1-RV32-NEXT:    lui a2, 209715
-; LMULMAX1-RV32-NEXT:    addi a4, a2, 819
-; LMULMAX1-RV32-NEXT:    lui a2, 61681
-; LMULMAX1-RV32-NEXT:    addi t0, a2, -241
+; LMULMAX1-RV32-NEXT:    addi a6, zero, 32
+; LMULMAX1-RV32-NEXT:    lui a1, 349525
+; LMULMAX1-RV32-NEXT:    addi a5, a1, 1365
+; LMULMAX1-RV32-NEXT:    lui a1, 209715
+; LMULMAX1-RV32-NEXT:    addi a4, a1, 819
+; LMULMAX1-RV32-NEXT:    lui a1, 61681
+; LMULMAX1-RV32-NEXT:    addi t0, a1, -241
 ; LMULMAX1-RV32-NEXT:    lui a3, 4112
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v26
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
 ; LMULMAX1-RV32-NEXT:    addi a3, a3, 257
-; LMULMAX1-RV32-NEXT:    bnez a2, .LBB7_2
+; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_2
 ; LMULMAX1-RV32-NEXT:  # %bb.1:
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v27, v26, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v27
 ; LMULMAX1-RV32-NEXT:    addi a2, a1, -1
 ; LMULMAX1-RV32-NEXT:    not a1, a1
 ; LMULMAX1-RV32-NEXT:    and a1, a1, a2
@@ -7999,9 +8004,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 32
 ; LMULMAX1-RV32-NEXT:    j .LBB7_3
 ; LMULMAX1-RV32-NEXT:  .LBB7_2:
-; LMULMAX1-RV32-NEXT:    addi a1, a2, -1
-; LMULMAX1-RV32-NEXT:    not a2, a2
-; LMULMAX1-RV32-NEXT:    and a1, a2, a1
+; LMULMAX1-RV32-NEXT:    addi a2, a1, -1
+; LMULMAX1-RV32-NEXT:    not a1, a1
+; LMULMAX1-RV32-NEXT:    and a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a2, a2, a5
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a2
@@ -8019,13 +8024,14 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v26, v26, 1
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
-; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v26, a7
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v26
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_5
 ; LMULMAX1-RV32-NEXT:  # %bb.4:
-; LMULMAX1-RV32-NEXT:    addi a1, a2, -1
-; LMULMAX1-RV32-NEXT:    not a2, a2
-; LMULMAX1-RV32-NEXT:    and a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v26, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX1-RV32-NEXT:    addi a2, a1, -1
+; LMULMAX1-RV32-NEXT:    not a1, a1
+; LMULMAX1-RV32-NEXT:    and a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a2, a2, a5
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a2
@@ -8060,15 +8066,16 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    sw a1, 24(sp)
 ; LMULMAX1-RV32-NEXT:    sw zero, 12(sp)
 ; LMULMAX1-RV32-NEXT:    sw zero, 4(sp)
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a7
+; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v26
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_8
 ; LMULMAX1-RV32-NEXT:  # %bb.7:
-; LMULMAX1-RV32-NEXT:    addi a1, a2, -1
-; LMULMAX1-RV32-NEXT:    not a2, a2
-; LMULMAX1-RV32-NEXT:    and a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v26
+; LMULMAX1-RV32-NEXT:    addi a2, a1, -1
+; LMULMAX1-RV32-NEXT:    not a1, a1
+; LMULMAX1-RV32-NEXT:    and a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a2, a2, a5
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a2
@@ -8104,13 +8111,14 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vslidedown.vi v25, v25, 1
 ; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
-; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a7
-; LMULMAX1-RV32-NEXT:    vmv.x.s a2, v25
 ; LMULMAX1-RV32-NEXT:    bnez a1, .LBB7_11
 ; LMULMAX1-RV32-NEXT:  # %bb.10:
-; LMULMAX1-RV32-NEXT:    addi a1, a2, -1
-; LMULMAX1-RV32-NEXT:    not a2, a2
-; LMULMAX1-RV32-NEXT:    and a1, a2, a1
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a6
+; LMULMAX1-RV32-NEXT:    vmv.x.s a1, v25
+; LMULMAX1-RV32-NEXT:    addi a2, a1, -1
+; LMULMAX1-RV32-NEXT:    not a1, a1
+; LMULMAX1-RV32-NEXT:    and a1, a1, a2
 ; LMULMAX1-RV32-NEXT:    srli a2, a1, 1
 ; LMULMAX1-RV32-NEXT:    and a2, a2, a5
 ; LMULMAX1-RV32-NEXT:    sub a1, a1, a2
@@ -8149,7 +8157,7 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vle32.v v26, (a1)
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v26, (a6)
+; LMULMAX1-RV32-NEXT:    vse64.v v26, (a7)
 ; LMULMAX1-RV32-NEXT:    addi sp, sp, 32
 ; LMULMAX1-RV32-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 5d2079004d6df..d878265d70cc9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -26,7 +26,6 @@ define half @vreduce_ord_fadd_v1f16(<1 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <1 x half>, <1 x half>* %x
@@ -45,7 +44,6 @@ define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -63,7 +61,6 @@ define half @vreduce_ord_fadd_v2f16(<2 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x half>, <2 x half>* %x
@@ -82,7 +79,6 @@ define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -100,7 +96,6 @@ define half @vreduce_ord_fadd_v4f16(<4 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -154,7 +149,6 @@ define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -172,7 +166,6 @@ define half @vreduce_ord_fadd_v16f16(<16 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x half>, <16 x half>* %x
@@ -192,7 +185,6 @@ define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -211,7 +203,6 @@ define half @vreduce_ord_fadd_v32f16(<32 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x half>, <32 x half>* %x
@@ -231,7 +222,6 @@ define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -250,7 +240,6 @@ define half @vreduce_ord_fadd_v64f16(<64 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x half>, <64 x half>* %x
@@ -273,7 +262,6 @@ define half @vreduce_fadd_v128f16(<128 x half>* %x, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -294,13 +282,11 @@ define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, a2, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v16, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a2, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x half>, <128 x half>* %x
@@ -332,7 +318,6 @@ define float @vreduce_ord_fadd_v1f32(<1 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <1 x float>, <1 x float>* %x
@@ -351,7 +336,6 @@ define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -369,7 +353,6 @@ define float @vreduce_ord_fadd_v2f32(<2 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, <2 x float>* %x
@@ -423,7 +406,6 @@ define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -441,7 +423,6 @@ define float @vreduce_ord_fadd_v8f32(<8 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x float>, <8 x float>* %x
@@ -460,7 +441,6 @@ define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -478,7 +458,6 @@ define float @vreduce_ord_fadd_v16f32(<16 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x float>, <16 x float>* %x
@@ -498,7 +477,6 @@ define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -517,7 +495,6 @@ define float @vreduce_ord_fadd_v32f32(<32 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x float>, <32 x float>* %x
@@ -540,7 +517,6 @@ define float @vreduce_fadd_v64f32(<64 x float>* %x, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -561,13 +537,11 @@ define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, a2, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v16, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a2, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x float>, <64 x float>* %x
@@ -652,7 +626,6 @@ define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -670,7 +643,6 @@ define double @vreduce_ord_fadd_v4f64(<4 x double>* %x, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -689,7 +661,6 @@ define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -707,7 +678,6 @@ define double @vreduce_ord_fadd_v8f64(<8 x double>* %x, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x double>, <8 x double>* %x
@@ -726,7 +696,6 @@ define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -744,7 +713,6 @@ define double @vreduce_ord_fadd_v16f64(<16 x double>* %x, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x double>, <16 x double>* %x
@@ -766,7 +734,6 @@ define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -786,13 +753,11 @@ define double @vreduce_ord_fadd_v32f64(<32 x double>* %x, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v16, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x double>, <32 x double>* %x
@@ -813,7 +778,6 @@ define half @vreduce_fmin_v2f16(<2 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x half>, <2 x half>* %x
@@ -834,7 +798,6 @@ define half @vreduce_fmin_v4f16(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -853,7 +816,6 @@ define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -872,7 +834,6 @@ define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -897,7 +858,6 @@ define half @vreduce_fmin_v128f16(<128 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x half>, <128 x half>* %x
@@ -918,7 +878,6 @@ define float @vreduce_fmin_v2f32(<2 x float>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, <2 x float>* %x
@@ -1005,7 +964,6 @@ define float @vreduce_fmin_v128f32(<128 x float>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x float>, <128 x float>* %x
@@ -1046,7 +1004,6 @@ define double @vreduce_fmin_v4f64(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1065,7 +1022,6 @@ define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1084,7 +1040,6 @@ define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1108,7 +1063,6 @@ define double @vreduce_fmin_v32f64(<32 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x double>, <32 x double>* %x
@@ -1129,7 +1083,6 @@ define half @vreduce_fmax_v2f16(<2 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x half>, <2 x half>* %x
@@ -1150,7 +1103,6 @@ define half @vreduce_fmax_v4f16(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -1169,7 +1121,6 @@ define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -1188,7 +1139,6 @@ define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x half>, <4 x half>* %x
@@ -1213,7 +1163,6 @@ define half @vreduce_fmax_v128f16(<128 x half>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x half>, <128 x half>* %x
@@ -1234,7 +1183,6 @@ define float @vreduce_fmax_v2f32(<2 x float>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v26, ft0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x float>, <2 x float>* %x
@@ -1321,7 +1269,6 @@ define float @vreduce_fmax_v128f32(<128 x float>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x float>, <128 x float>* %x
@@ -1362,7 +1309,6 @@ define double @vreduce_fmax_v4f64(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1381,7 +1327,6 @@ define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1400,7 +1345,6 @@ define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x double>, <4 x double>* %x
@@ -1424,7 +1368,6 @@ define double @vreduce_fmax_v32f64(<32 x double>* %x) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x double>, <32 x double>* %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 5044c9d605ec7..cc1851a977e61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -27,7 +27,6 @@ define i8 @vreduce_add_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -46,7 +45,6 @@ define i8 @vreduce_add_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -65,7 +63,6 @@ define i8 @vreduce_add_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -103,7 +100,6 @@ define i8 @vreduce_add_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -123,7 +119,6 @@ define i8 @vreduce_add_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -143,7 +138,6 @@ define i8 @vreduce_add_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -166,7 +160,6 @@ define i8 @vreduce_add_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -199,7 +192,6 @@ define i16 @vreduce_add_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -218,7 +210,6 @@ define i16 @vreduce_add_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -255,7 +246,6 @@ define i16 @vreduce_add_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -275,7 +265,6 @@ define i16 @vreduce_add_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -295,7 +284,6 @@ define i16 @vreduce_add_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -318,7 +306,6 @@ define i16 @vreduce_add_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -351,7 +338,6 @@ define i32 @vreduce_add_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -388,7 +374,6 @@ define i32 @vreduce_add_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -407,7 +392,6 @@ define i32 @vreduce_add_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -427,7 +411,6 @@ define i32 @vreduce_add_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -450,7 +433,6 @@ define i32 @vreduce_add_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -526,7 +508,6 @@ define i64 @vreduce_add_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredsum.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -542,7 +523,6 @@ define i64 @vreduce_add_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredsum.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -561,7 +541,6 @@ define i64 @vreduce_add_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredsum.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -577,7 +556,6 @@ define i64 @vreduce_add_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredsum.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -596,7 +574,6 @@ define i64 @vreduce_add_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredsum.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -612,7 +589,6 @@ define i64 @vreduce_add_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredsum.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -634,7 +610,6 @@ define i64 @vreduce_add_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredsum.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -653,7 +628,6 @@ define i64 @vreduce_add_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredsum.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -681,7 +655,6 @@ define i64 @vreduce_add_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredsum.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -706,7 +679,6 @@ define i64 @vreduce_add_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredsum.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -739,7 +711,6 @@ define i8 @vreduce_and_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -758,7 +729,6 @@ define i8 @vreduce_and_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -777,7 +747,6 @@ define i8 @vreduce_and_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -815,7 +784,6 @@ define i8 @vreduce_and_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -835,7 +803,6 @@ define i8 @vreduce_and_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -855,7 +822,6 @@ define i8 @vreduce_and_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -878,7 +844,6 @@ define i8 @vreduce_and_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -911,7 +876,6 @@ define i16 @vreduce_and_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -930,7 +894,6 @@ define i16 @vreduce_and_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -967,7 +930,6 @@ define i16 @vreduce_and_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -987,7 +949,6 @@ define i16 @vreduce_and_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -1007,7 +968,6 @@ define i16 @vreduce_and_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -1030,7 +990,6 @@ define i16 @vreduce_and_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -1063,7 +1022,6 @@ define i32 @vreduce_and_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -1100,7 +1058,6 @@ define i32 @vreduce_and_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -1119,7 +1076,6 @@ define i32 @vreduce_and_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -1139,7 +1095,6 @@ define i32 @vreduce_and_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -1162,7 +1117,6 @@ define i32 @vreduce_and_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -1238,7 +1192,6 @@ define i64 @vreduce_and_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredand.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1254,7 +1207,6 @@ define i64 @vreduce_and_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredand.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -1273,7 +1225,6 @@ define i64 @vreduce_and_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredand.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1289,7 +1240,6 @@ define i64 @vreduce_and_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredand.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -1308,7 +1258,6 @@ define i64 @vreduce_and_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredand.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1324,7 +1273,6 @@ define i64 @vreduce_and_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredand.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -1346,7 +1294,6 @@ define i64 @vreduce_and_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredand.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1365,7 +1312,6 @@ define i64 @vreduce_and_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredand.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -1393,7 +1339,6 @@ define i64 @vreduce_and_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredand.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1418,7 +1363,6 @@ define i64 @vreduce_and_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredand.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -1451,7 +1395,6 @@ define i8 @vreduce_or_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -1470,7 +1413,6 @@ define i8 @vreduce_or_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -1489,7 +1431,6 @@ define i8 @vreduce_or_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -1527,7 +1468,6 @@ define i8 @vreduce_or_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -1547,7 +1487,6 @@ define i8 @vreduce_or_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -1567,7 +1506,6 @@ define i8 @vreduce_or_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -1590,7 +1528,6 @@ define i8 @vreduce_or_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -1623,7 +1560,6 @@ define i16 @vreduce_or_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -1642,7 +1578,6 @@ define i16 @vreduce_or_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -1679,7 +1614,6 @@ define i16 @vreduce_or_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -1699,7 +1633,6 @@ define i16 @vreduce_or_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -1719,7 +1652,6 @@ define i16 @vreduce_or_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -1742,7 +1674,6 @@ define i16 @vreduce_or_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -1775,7 +1706,6 @@ define i32 @vreduce_or_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -1812,7 +1742,6 @@ define i32 @vreduce_or_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -1831,7 +1760,6 @@ define i32 @vreduce_or_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -1851,7 +1779,6 @@ define i32 @vreduce_or_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -1874,7 +1801,6 @@ define i32 @vreduce_or_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -1950,7 +1876,6 @@ define i64 @vreduce_or_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredor.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1966,7 +1891,6 @@ define i64 @vreduce_or_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredor.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -1985,7 +1909,6 @@ define i64 @vreduce_or_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredor.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2001,7 +1924,6 @@ define i64 @vreduce_or_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredor.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -2020,7 +1942,6 @@ define i64 @vreduce_or_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2036,7 +1957,6 @@ define i64 @vreduce_or_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -2058,7 +1978,6 @@ define i64 @vreduce_or_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2077,7 +1996,6 @@ define i64 @vreduce_or_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -2105,7 +2023,6 @@ define i64 @vreduce_or_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2130,7 +2047,6 @@ define i64 @vreduce_or_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -2163,7 +2079,6 @@ define i8 @vreduce_xor_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -2182,7 +2097,6 @@ define i8 @vreduce_xor_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -2201,7 +2115,6 @@ define i8 @vreduce_xor_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -2239,7 +2152,6 @@ define i8 @vreduce_xor_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -2259,7 +2171,6 @@ define i8 @vreduce_xor_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -2279,7 +2190,6 @@ define i8 @vreduce_xor_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -2302,7 +2212,6 @@ define i8 @vreduce_xor_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -2335,7 +2244,6 @@ define i16 @vreduce_xor_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -2354,7 +2262,6 @@ define i16 @vreduce_xor_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -2391,7 +2298,6 @@ define i16 @vreduce_xor_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -2411,7 +2317,6 @@ define i16 @vreduce_xor_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -2431,7 +2336,6 @@ define i16 @vreduce_xor_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -2454,7 +2358,6 @@ define i16 @vreduce_xor_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -2487,7 +2390,6 @@ define i32 @vreduce_xor_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -2524,7 +2426,6 @@ define i32 @vreduce_xor_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -2543,7 +2444,6 @@ define i32 @vreduce_xor_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -2563,7 +2463,6 @@ define i32 @vreduce_xor_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -2586,7 +2485,6 @@ define i32 @vreduce_xor_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -2662,7 +2560,6 @@ define i64 @vreduce_xor_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredxor.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2678,7 +2575,6 @@ define i64 @vreduce_xor_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredxor.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -2697,7 +2593,6 @@ define i64 @vreduce_xor_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredxor.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2713,7 +2608,6 @@ define i64 @vreduce_xor_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredxor.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -2732,7 +2626,6 @@ define i64 @vreduce_xor_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredxor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2748,7 +2641,6 @@ define i64 @vreduce_xor_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredxor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -2770,7 +2662,6 @@ define i64 @vreduce_xor_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredxor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2789,7 +2680,6 @@ define i64 @vreduce_xor_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredxor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -2817,7 +2707,6 @@ define i64 @vreduce_xor_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredxor.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -2842,7 +2731,6 @@ define i64 @vreduce_xor_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredxor.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -2876,7 +2764,6 @@ define i8 @vreduce_smin_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -2896,7 +2783,6 @@ define i8 @vreduce_smin_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -2916,7 +2802,6 @@ define i8 @vreduce_smin_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -2956,7 +2841,6 @@ define i8 @vreduce_smin_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -2977,7 +2861,6 @@ define i8 @vreduce_smin_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -2998,7 +2881,6 @@ define i8 @vreduce_smin_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -3022,7 +2904,6 @@ define i8 @vreduce_smin_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -3057,7 +2938,6 @@ define i16 @vreduce_smin_v2i16(<2 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v26, a0
 ; RV32-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v25, v26
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3071,7 +2951,6 @@ define i16 @vreduce_smin_v2i16(<2 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v26, a0
 ; RV64-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v25, v26
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -3092,7 +2971,6 @@ define i16 @vreduce_smin_v4i16(<4 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v26, a0
 ; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v25, v26
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3106,7 +2984,6 @@ define i16 @vreduce_smin_v4i16(<4 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v26, a0
 ; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v25, v26
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -3160,7 +3037,6 @@ define i16 @vreduce_smin_v16i16(<16 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3174,7 +3050,6 @@ define i16 @vreduce_smin_v16i16(<16 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -3196,7 +3071,6 @@ define i16 @vreduce_smin_v32i16(<32 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3211,7 +3085,6 @@ define i16 @vreduce_smin_v32i16(<32 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -3233,7 +3106,6 @@ define i16 @vreduce_smin_v64i16(<64 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3248,7 +3120,6 @@ define i16 @vreduce_smin_v64i16(<64 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -3273,7 +3144,6 @@ define i16 @vreduce_smin_v128i16(<128 x i16>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3291,7 +3161,6 @@ define i16 @vreduce_smin_v128i16(<128 x i16>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -3326,7 +3195,6 @@ define i32 @vreduce_smin_v2i32(<2 x i32>* %x) {
 ; RV32-NEXT:    vmv.v.x v26, a0
 ; RV32-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v25, v26
-; RV32-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3340,7 +3208,6 @@ define i32 @vreduce_smin_v2i32(<2 x i32>* %x) {
 ; RV64-NEXT:    vmv.v.x v26, a0
 ; RV64-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v25, v26
-; RV64-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -3394,7 +3261,6 @@ define i32 @vreduce_smin_v8i32(<8 x i32>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3408,7 +3274,6 @@ define i32 @vreduce_smin_v8i32(<8 x i32>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -3429,7 +3294,6 @@ define i32 @vreduce_smin_v16i32(<16 x i32>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3443,7 +3307,6 @@ define i32 @vreduce_smin_v16i32(<16 x i32>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -3465,7 +3328,6 @@ define i32 @vreduce_smin_v32i32(<32 x i32>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3480,7 +3342,6 @@ define i32 @vreduce_smin_v32i32(<32 x i32>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -3505,7 +3366,6 @@ define i32 @vreduce_smin_v64i32(<64 x i32>* %x) {
 ; RV32-NEXT:    vmv.v.x v25, a0
 ; RV32-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    ret
 ;
@@ -3523,7 +3383,6 @@ define i32 @vreduce_smin_v64i32(<64 x i32>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -3618,7 +3477,6 @@ define i64 @vreduce_smin_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -3637,7 +3495,6 @@ define i64 @vreduce_smin_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -3664,7 +3521,6 @@ define i64 @vreduce_smin_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -3683,7 +3539,6 @@ define i64 @vreduce_smin_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -3710,7 +3565,6 @@ define i64 @vreduce_smin_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -3729,7 +3583,6 @@ define i64 @vreduce_smin_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -3759,7 +3612,6 @@ define i64 @vreduce_smin_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -3781,7 +3633,6 @@ define i64 @vreduce_smin_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -3816,7 +3667,6 @@ define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmin.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -3844,7 +3694,6 @@ define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmin.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -3878,7 +3727,6 @@ define i8 @vreduce_smax_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -3898,7 +3746,6 @@ define i8 @vreduce_smax_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -3918,7 +3765,6 @@ define i8 @vreduce_smax_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -3958,7 +3804,6 @@ define i8 @vreduce_smax_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -3979,7 +3824,6 @@ define i8 @vreduce_smax_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -4000,7 +3844,6 @@ define i8 @vreduce_smax_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -4024,7 +3867,6 @@ define i8 @vreduce_smax_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -4058,7 +3900,6 @@ define i16 @vreduce_smax_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -4078,7 +3919,6 @@ define i16 @vreduce_smax_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -4117,7 +3957,6 @@ define i16 @vreduce_smax_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -4138,7 +3977,6 @@ define i16 @vreduce_smax_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -4159,7 +3997,6 @@ define i16 @vreduce_smax_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -4183,7 +4020,6 @@ define i16 @vreduce_smax_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -4217,7 +4053,6 @@ define i32 @vreduce_smax_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.x v26, a0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -4256,7 +4091,6 @@ define i32 @vreduce_smax_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -4276,7 +4110,6 @@ define i32 @vreduce_smax_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -4297,7 +4130,6 @@ define i32 @vreduce_smax_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -4321,7 +4153,6 @@ define i32 @vreduce_smax_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -4412,7 +4243,6 @@ define i64 @vreduce_smax_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredmax.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -4431,7 +4261,6 @@ define i64 @vreduce_smax_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredmax.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -4456,7 +4285,6 @@ define i64 @vreduce_smax_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredmax.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -4475,7 +4303,6 @@ define i64 @vreduce_smax_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredmax.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -4500,7 +4327,6 @@ define i64 @vreduce_smax_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmax.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -4519,7 +4345,6 @@ define i64 @vreduce_smax_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmax.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -4547,7 +4372,6 @@ define i64 @vreduce_smax_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmax.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -4569,7 +4393,6 @@ define i64 @vreduce_smax_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmax.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -4602,7 +4425,6 @@ define i64 @vreduce_smax_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vlse64.v v25, (a0), zero
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmax.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -4630,7 +4452,6 @@ define i64 @vreduce_smax_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.x v25, a0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmax.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -4663,7 +4484,6 @@ define i8 @vreduce_umin_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -4682,7 +4502,6 @@ define i8 @vreduce_umin_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -4701,7 +4520,6 @@ define i8 @vreduce_umin_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -4739,7 +4557,6 @@ define i8 @vreduce_umin_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -4759,7 +4576,6 @@ define i8 @vreduce_umin_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -4779,7 +4595,6 @@ define i8 @vreduce_umin_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -4802,7 +4617,6 @@ define i8 @vreduce_umin_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -4835,7 +4649,6 @@ define i16 @vreduce_umin_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -4854,7 +4667,6 @@ define i16 @vreduce_umin_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -4891,7 +4703,6 @@ define i16 @vreduce_umin_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -4911,7 +4722,6 @@ define i16 @vreduce_umin_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -4931,7 +4741,6 @@ define i16 @vreduce_umin_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -4954,7 +4763,6 @@ define i16 @vreduce_umin_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -4987,7 +4795,6 @@ define i32 @vreduce_umin_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, -1
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -5024,7 +4831,6 @@ define i32 @vreduce_umin_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -5043,7 +4849,6 @@ define i32 @vreduce_umin_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -5063,7 +4868,6 @@ define i32 @vreduce_umin_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -5086,7 +4890,6 @@ define i32 @vreduce_umin_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -5162,7 +4965,6 @@ define i64 @vreduce_umin_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredminu.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5178,7 +4980,6 @@ define i64 @vreduce_umin_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredminu.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -5197,7 +4998,6 @@ define i64 @vreduce_umin_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredminu.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5213,7 +5013,6 @@ define i64 @vreduce_umin_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredminu.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -5232,7 +5031,6 @@ define i64 @vreduce_umin_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredminu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5248,7 +5046,6 @@ define i64 @vreduce_umin_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredminu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -5270,7 +5067,6 @@ define i64 @vreduce_umin_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredminu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5289,7 +5085,6 @@ define i64 @vreduce_umin_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredminu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -5317,7 +5112,6 @@ define i64 @vreduce_umin_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, -1
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredminu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5342,7 +5136,6 @@ define i64 @vreduce_umin_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, -1
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredminu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x
@@ -5375,7 +5168,6 @@ define i8 @vreduce_umax_v2i8(<2 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i8>, <2 x i8>* %x
@@ -5394,7 +5186,6 @@ define i8 @vreduce_umax_v4i8(<4 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i8>, <4 x i8>* %x
@@ -5413,7 +5204,6 @@ define i8 @vreduce_umax_v8i8(<8 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i8>, <8 x i8>* %x
@@ -5451,7 +5241,6 @@ define i8 @vreduce_umax_v32i8(<32 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i8>, <32 x i8>* %x
@@ -5471,7 +5260,6 @@ define i8 @vreduce_umax_v64i8(<64 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i8>, <64 x i8>* %x
@@ -5491,7 +5279,6 @@ define i8 @vreduce_umax_v128i8(<128 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i8>, <128 x i8>* %x
@@ -5514,7 +5301,6 @@ define i8 @vreduce_umax_v256i8(<256 x i8>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e8,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <256 x i8>, <256 x i8>* %x
@@ -5547,7 +5333,6 @@ define i16 @vreduce_umax_v2i16(<2 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i16>, <2 x i16>* %x
@@ -5566,7 +5351,6 @@ define i16 @vreduce_umax_v4i16(<4 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <4 x i16>, <4 x i16>* %x
@@ -5603,7 +5387,6 @@ define i16 @vreduce_umax_v16i16(<16 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i16>, <16 x i16>* %x
@@ -5623,7 +5406,6 @@ define i16 @vreduce_umax_v32i16(<32 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i16>, <32 x i16>* %x
@@ -5643,7 +5425,6 @@ define i16 @vreduce_umax_v64i16(<64 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i16>, <64 x i16>* %x
@@ -5666,7 +5447,6 @@ define i16 @vreduce_umax_v128i16(<128 x i16>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e16,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <128 x i16>, <128 x i16>* %x
@@ -5699,7 +5479,6 @@ define i32 @vreduce_umax_v2i32(<2 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v26, 0
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v25, v26
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <2 x i32>, <2 x i32>* %x
@@ -5736,7 +5515,6 @@ define i32 @vreduce_umax_v8i32(<8 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v26, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <8 x i32>, <8 x i32>* %x
@@ -5755,7 +5533,6 @@ define i32 @vreduce_umax_v16i32(<16 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v28, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <16 x i32>, <16 x i32>* %x
@@ -5775,7 +5552,6 @@ define i32 @vreduce_umax_v32i32(<32 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <32 x i32>, <32 x i32>* %x
@@ -5798,7 +5574,6 @@ define i32 @vreduce_umax_v64i32(<64 x i32>* %x) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, a1, e32,m8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %v = load <64 x i32>, <64 x i32>* %x
@@ -5874,7 +5649,6 @@ define i64 @vreduce_umax_v4i64(<4 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vredmaxu.vs v25, v26, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5890,7 +5664,6 @@ define i64 @vreduce_umax_v4i64(<4 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vredmaxu.vs v25, v26, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <4 x i64>, <4 x i64>* %x
@@ -5909,7 +5682,6 @@ define i64 @vreduce_umax_v8i64(<8 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vredmaxu.vs v25, v28, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5925,7 +5697,6 @@ define i64 @vreduce_umax_v8i64(<8 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vredmaxu.vs v25, v28, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <8 x i64>, <8 x i64>* %x
@@ -5944,7 +5715,6 @@ define i64 @vreduce_umax_v16i64(<16 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmaxu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -5960,7 +5730,6 @@ define i64 @vreduce_umax_v16i64(<16 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmaxu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <16 x i64>, <16 x i64>* %x
@@ -5982,7 +5751,6 @@ define i64 @vreduce_umax_v32i64(<32 x i64>* %x) {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmaxu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -6001,7 +5769,6 @@ define i64 @vreduce_umax_v32i64(<32 x i64>* %x) {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmaxu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <32 x i64>, <32 x i64>* %x
@@ -6029,7 +5796,6 @@ define i64 @vreduce_umax_v64i64(<64 x i64>* %x) nounwind {
 ; RV32-NEXT:    vmv.v.i v25, 0
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vredmaxu.vs v25, v8, v25
-; RV32-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV32-NEXT:    vmv.x.s a0, v25
 ; RV32-NEXT:    addi a1, zero, 32
 ; RV32-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -6054,7 +5820,6 @@ define i64 @vreduce_umax_v64i64(<64 x i64>* %x) nounwind {
 ; RV64-NEXT:    vmv.v.i v25, 0
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vredmaxu.vs v25, v8, v25
-; RV64-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.x.s a0, v25
 ; RV64-NEXT:    ret
   %v = load <64 x i64>, <64 x i64>* %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
index df93b907c41c3..e494151ce6936 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -7,20 +7,22 @@
 define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
 ; CHECK-LABEL: select_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft3, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft2, v25
 ; CHECK-NEXT:    bnez a0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
-; CHECK-NEXT:    fmv.h ft2, ft3
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB0_3
 ; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    vfmv.f.s ft1, v25
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
-; CHECK-NEXT:    vfmv.v.f v8, ft2
+; CHECK-NEXT:    vfmv.v.f v8, ft1
 ; CHECK-NEXT:    vfmv.s.f v8, ft0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x half> %a, <2 x half> %b
@@ -31,23 +33,27 @@ define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d
 ; CHECK-LABEL: selectcc_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
+; CHECK-NEXT:    bnez a0, .LBB1_2
+; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
+; CHECK-NEXT:    j .LBB1_3
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    vsetivli a1, 1, e16,mf4,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB1_3:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    bnez a0, .LBB1_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
-; CHECK-NEXT:  .LBB1_2:
 ; CHECK-NEXT:    vsetivli a1, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    vfmv.f.s ft1, v9
+; CHECK-NEXT:    bnez a0, .LBB1_5
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    j .LBB1_6
+; CHECK-NEXT:  .LBB1_5:
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB1_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.h ft0, ft1
-; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:  .LBB1_6:
 ; CHECK-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfmv.s.f v25, ft0
 ; CHECK-NEXT:    vmv1r.v v8, v25
@@ -62,44 +68,49 @@ define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB2_2
+; CHECK-NEXT:    bnez a0, .LBB2_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    beqz a0, .LBB2_4
 ; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 3
+; CHECK-NEXT:    j .LBB2_5
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB2_2
+; CHECK-NEXT:  .LBB2_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
+; CHECK-NEXT:  .LBB2_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB2_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    fsh ft1, 14(sp)
+; CHECK-NEXT:    fsh ft0, 14(sp)
+; CHECK-NEXT:    bnez a0, .LBB2_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB2_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB2_6:
-; CHECK-NEXT:    fsh ft1, 12(sp)
+; CHECK-NEXT:    j .LBB2_8
+; CHECK-NEXT:  .LBB2_7:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB2_8:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB2_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB2_11
+; CHECK-NEXT:  .LBB2_10:
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB2_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB2_8:
-; CHECK-NEXT:    fsh ft1, 10(sp)
+; CHECK-NEXT:  .LBB2_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 10(sp)
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vle16.v v8, (a0)
@@ -115,44 +126,49 @@ define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
-; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB3_2
+; CHECK-NEXT:    bnez a0, .LBB3_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    beqz a0, .LBB3_4
 ; CHECK-NEXT:  .LBB3_2:
+; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 3
+; CHECK-NEXT:    j .LBB3_5
+; CHECK-NEXT:  .LBB3_3:
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB3_2
+; CHECK-NEXT:  .LBB3_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
+; CHECK-NEXT:  .LBB3_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB3_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB3_4:
-; CHECK-NEXT:    fsh ft1, 14(sp)
+; CHECK-NEXT:    fsh ft0, 14(sp)
+; CHECK-NEXT:    bnez a0, .LBB3_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB3_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB3_6:
-; CHECK-NEXT:    fsh ft1, 12(sp)
+; CHECK-NEXT:    j .LBB3_8
+; CHECK-NEXT:  .LBB3_7:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,mf2,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB3_8:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB3_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB3_11
+; CHECK-NEXT:  .LBB3_10:
+; CHECK-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB3_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB3_8:
-; CHECK-NEXT:    fsh ft1, 10(sp)
+; CHECK-NEXT:  .LBB3_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 10(sp)
 ; CHECK-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    addi a0, sp, 8
 ; CHECK-NEXT:    vle16.v v8, (a0)
@@ -168,84 +184,93 @@ define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB4_2
+; CHECK-NEXT:    bnez a0, .LBB4_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsh ft0, 0(sp)
+; CHECK-NEXT:    beqz a0, .LBB4_4
 ; CHECK-NEXT:  .LBB4_2:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 7
+; CHECK-NEXT:    j .LBB4_5
+; CHECK-NEXT:  .LBB4_3:
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsh ft0, 0(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_2
+; CHECK-NEXT:  .LBB4_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 7
+; CHECK-NEXT:  .LBB4_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 7
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_4:
-; CHECK-NEXT:    fsh ft1, 14(sp)
+; CHECK-NEXT:    fsh ft0, 14(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 6
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB4_8
+; CHECK-NEXT:  .LBB4_7:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 6
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_6:
-; CHECK-NEXT:    fsh ft1, 12(sp)
+; CHECK-NEXT:  .LBB4_8:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_10
+; CHECK-NEXT:  # %bb.9:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 5
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB4_11
+; CHECK-NEXT:  .LBB4_10:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 5
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_8:
-; CHECK-NEXT:    fsh ft1, 10(sp)
+; CHECK-NEXT:  .LBB4_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 10(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_13
+; CHECK-NEXT:  # %bb.12:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 4
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB4_14
+; CHECK-NEXT:  .LBB4_13:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 4
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_10:
-; CHECK-NEXT:    fsh ft1, 8(sp)
+; CHECK-NEXT:  .LBB4_14:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_16
+; CHECK-NEXT:  # %bb.15:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB4_17
+; CHECK-NEXT:  .LBB4_16:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_12
-; CHECK-NEXT:  # %bb.11:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_12:
-; CHECK-NEXT:    fsh ft1, 6(sp)
+; CHECK-NEXT:  .LBB4_17:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 6(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_19
+; CHECK-NEXT:  # %bb.18:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_14
-; CHECK-NEXT:  # %bb.13:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_14:
-; CHECK-NEXT:    fsh ft1, 4(sp)
+; CHECK-NEXT:    j .LBB4_20
+; CHECK-NEXT:  .LBB4_19:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB4_20:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 4(sp)
+; CHECK-NEXT:    bnez a0, .LBB4_22
+; CHECK-NEXT:  # %bb.21:
+; CHECK-NEXT:    vsetivli a0, 1, e16,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB4_23
+; CHECK-NEXT:  .LBB4_22:
+; CHECK-NEXT:    vsetivli a0, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB4_16
-; CHECK-NEXT:  # %bb.15:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB4_16:
-; CHECK-NEXT:    fsh ft1, 2(sp)
+; CHECK-NEXT:  .LBB4_23:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 2(sp)
 ; CHECK-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16.v v8, (sp)
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -260,84 +285,93 @@ define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    feq.h a0, fa0, fa1
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB5_2
+; CHECK-NEXT:    bnez a0, .LBB5_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.h ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsh ft0, 0(sp)
+; CHECK-NEXT:    beqz a0, .LBB5_4
 ; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 7
+; CHECK-NEXT:    j .LBB5_5
+; CHECK-NEXT:  .LBB5_3:
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsh ft0, 0(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_2
+; CHECK-NEXT:  .LBB5_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 7
+; CHECK-NEXT:  .LBB5_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 7
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_4:
-; CHECK-NEXT:    fsh ft1, 14(sp)
+; CHECK-NEXT:    fsh ft0, 14(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 6
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB5_8
+; CHECK-NEXT:  .LBB5_7:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 6
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_6:
-; CHECK-NEXT:    fsh ft1, 12(sp)
+; CHECK-NEXT:  .LBB5_8:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_10
+; CHECK-NEXT:  # %bb.9:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 5
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB5_11
+; CHECK-NEXT:  .LBB5_10:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 5
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_8:
-; CHECK-NEXT:    fsh ft1, 10(sp)
+; CHECK-NEXT:  .LBB5_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 10(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_13
+; CHECK-NEXT:  # %bb.12:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 4
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB5_14
+; CHECK-NEXT:  .LBB5_13:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 4
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_10
-; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_10:
-; CHECK-NEXT:    fsh ft1, 8(sp)
+; CHECK-NEXT:  .LBB5_14:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_16
+; CHECK-NEXT:  # %bb.15:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
-; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    j .LBB5_17
+; CHECK-NEXT:  .LBB5_16:
+; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_12
-; CHECK-NEXT:  # %bb.11:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_12:
-; CHECK-NEXT:    fsh ft1, 6(sp)
+; CHECK-NEXT:  .LBB5_17:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 6(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_19
+; CHECK-NEXT:  # %bb.18:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_14
-; CHECK-NEXT:  # %bb.13:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_14:
-; CHECK-NEXT:    fsh ft1, 4(sp)
+; CHECK-NEXT:    j .LBB5_20
+; CHECK-NEXT:  .LBB5_19:
 ; CHECK-NEXT:    vsetivli a1, 1, e16,m1,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB5_20:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 4(sp)
+; CHECK-NEXT:    bnez a0, .LBB5_22
+; CHECK-NEXT:  # %bb.21:
+; CHECK-NEXT:    vsetivli a0, 1, e16,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB5_23
+; CHECK-NEXT:  .LBB5_22:
+; CHECK-NEXT:    vsetivli a0, 1, e16,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB5_16
-; CHECK-NEXT:  # %bb.15:
-; CHECK-NEXT:    fmv.h ft1, ft0
-; CHECK-NEXT:  .LBB5_16:
-; CHECK-NEXT:    fsh ft1, 2(sp)
+; CHECK-NEXT:  .LBB5_23:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsh ft0, 2(sp)
 ; CHECK-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16.v v8, (sp)
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -359,165 +393,182 @@ define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b)
 ; RV32-NEXT:    addi s0, sp, 64
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB6_2
+; RV32-NEXT:    bnez a0, .LBB6_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.h ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsh ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB6_4
 ; RV32-NEXT:  .LBB6_2:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 15
+; RV32-NEXT:    j .LBB6_5
+; RV32-NEXT:  .LBB6_3:
+; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsh ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB6_2
+; RV32-NEXT:  .LBB6_4:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 15
+; RV32-NEXT:  .LBB6_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_4:
-; RV32-NEXT:    fsh ft1, 30(sp)
+; RV32-NEXT:    fsh ft0, 30(sp)
+; RV32-NEXT:    bnez a0, .LBB6_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 14
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_6:
-; RV32-NEXT:    fsh ft1, 28(sp)
+; RV32-NEXT:    j .LBB6_8
+; RV32-NEXT:  .LBB6_7:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 13
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vslidedown.vi v26, v8, 14
 ; RV32-NEXT:  .LBB6_8:
-; RV32-NEXT:    fsh ft1, 26(sp)
-; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 12
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v26
+; RV32-NEXT:    fsh ft0, 28(sp)
 ; RV32-NEXT:    bnez a0, .LBB6_10
 ; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 13
+; RV32-NEXT:    j .LBB6_11
 ; RV32-NEXT:  .LBB6_10:
-; RV32-NEXT:    fsh ft1, 24(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 11
+; RV32-NEXT:    vslidedown.vi v26, v8, 13
+; RV32-NEXT:  .LBB6_11:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_12:
-; RV32-NEXT:    fsh ft1, 22(sp)
+; RV32-NEXT:    fsh ft0, 26(sp)
+; RV32-NEXT:    bnez a0, .LBB6_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 10
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_14:
-; RV32-NEXT:    fsh ft1, 20(sp)
+; RV32-NEXT:    vslidedown.vi v26, v10, 12
+; RV32-NEXT:    j .LBB6_14
+; RV32-NEXT:  .LBB6_13:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 9
+; RV32-NEXT:    vslidedown.vi v26, v8, 12
+; RV32-NEXT:  .LBB6_14:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v26
+; RV32-NEXT:    fsh ft0, 24(sp)
 ; RV32-NEXT:    bnez a0, .LBB6_16
 ; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 11
+; RV32-NEXT:    j .LBB6_17
 ; RV32-NEXT:  .LBB6_16:
-; RV32-NEXT:    fsh ft1, 18(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 8
+; RV32-NEXT:    vslidedown.vi v26, v8, 11
+; RV32-NEXT:  .LBB6_17:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_18:
-; RV32-NEXT:    fsh ft1, 16(sp)
+; RV32-NEXT:    fsh ft0, 22(sp)
+; RV32-NEXT:    bnez a0, .LBB6_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_20:
-; RV32-NEXT:    fsh ft1, 14(sp)
+; RV32-NEXT:    vslidedown.vi v26, v10, 10
+; RV32-NEXT:    j .LBB6_20
+; RV32-NEXT:  .LBB6_19:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 6
+; RV32-NEXT:    vslidedown.vi v26, v8, 10
+; RV32-NEXT:  .LBB6_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v26
+; RV32-NEXT:    fsh ft0, 20(sp)
 ; RV32-NEXT:    bnez a0, .LBB6_22
 ; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 9
+; RV32-NEXT:    j .LBB6_23
 ; RV32-NEXT:  .LBB6_22:
-; RV32-NEXT:    fsh ft1, 12(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 5
+; RV32-NEXT:    vslidedown.vi v26, v8, 9
+; RV32-NEXT:  .LBB6_23:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_24:
-; RV32-NEXT:    fsh ft1, 10(sp)
+; RV32-NEXT:    fsh ft0, 18(sp)
+; RV32-NEXT:    bnez a0, .LBB6_25
+; RV32-NEXT:  # %bb.24:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_26:
-; RV32-NEXT:    fsh ft1, 8(sp)
+; RV32-NEXT:    vslidedown.vi v26, v10, 8
+; RV32-NEXT:    j .LBB6_26
+; RV32-NEXT:  .LBB6_25:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 3
+; RV32-NEXT:    vslidedown.vi v26, v8, 8
+; RV32-NEXT:  .LBB6_26:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
+; RV32-NEXT:    fsh ft0, 16(sp)
 ; RV32-NEXT:    bnez a0, .LBB6_28
 ; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 7
+; RV32-NEXT:    j .LBB6_29
 ; RV32-NEXT:  .LBB6_28:
-; RV32-NEXT:    fsh ft1, 6(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 2
+; RV32-NEXT:    vslidedown.vi v26, v8, 7
+; RV32-NEXT:  .LBB6_29:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB6_30:
-; RV32-NEXT:    fsh ft1, 4(sp)
+; RV32-NEXT:    fsh ft0, 14(sp)
+; RV32-NEXT:    bnez a0, .LBB6_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB6_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.h ft1, ft0
+; RV32-NEXT:    vslidedown.vi v26, v10, 6
+; RV32-NEXT:    j .LBB6_32
+; RV32-NEXT:  .LBB6_31:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 6
 ; RV32-NEXT:  .LBB6_32:
-; RV32-NEXT:    fsh ft1, 2(sp)
-; RV32-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB6_34
+; RV32-NEXT:  # %bb.33:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 5
+; RV32-NEXT:    j .LBB6_35
+; RV32-NEXT:  .LBB6_34:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 5
+; RV32-NEXT:  .LBB6_35:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 10(sp)
+; RV32-NEXT:    bnez a0, .LBB6_37
+; RV32-NEXT:  # %bb.36:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 4
+; RV32-NEXT:    j .LBB6_38
+; RV32-NEXT:  .LBB6_37:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 4
+; RV32-NEXT:  .LBB6_38:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB6_40
+; RV32-NEXT:  # %bb.39:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 3
+; RV32-NEXT:    j .LBB6_41
+; RV32-NEXT:  .LBB6_40:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 3
+; RV32-NEXT:  .LBB6_41:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 6(sp)
+; RV32-NEXT:    bnez a0, .LBB6_43
+; RV32-NEXT:  # %bb.42:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 2
+; RV32-NEXT:    j .LBB6_44
+; RV32-NEXT:  .LBB6_43:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB6_44:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 4(sp)
+; RV32-NEXT:    bnez a0, .LBB6_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB6_47
+; RV32-NEXT:  .LBB6_46:
+; RV32-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 1
+; RV32-NEXT:  .LBB6_47:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 2(sp)
+; RV32-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV32-NEXT:    vle16.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
 ; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
@@ -536,164 +587,181 @@ define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b)
 ; RV64-NEXT:    addi s0, sp, 64
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB6_2
+; RV64-NEXT:    bnez a0, .LBB6_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.h ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsh ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB6_4
 ; RV64-NEXT:  .LBB6_2:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 15
+; RV64-NEXT:    j .LBB6_5
+; RV64-NEXT:  .LBB6_3:
+; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsh ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB6_2
+; RV64-NEXT:  .LBB6_4:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 15
+; RV64-NEXT:  .LBB6_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_4:
-; RV64-NEXT:    fsh ft1, 30(sp)
+; RV64-NEXT:    fsh ft0, 30(sp)
+; RV64-NEXT:    bnez a0, .LBB6_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 14
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_8
+; RV64-NEXT:  .LBB6_7:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_6:
-; RV64-NEXT:    fsh ft1, 28(sp)
+; RV64-NEXT:  .LBB6_8:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 28(sp)
+; RV64-NEXT:    bnez a0, .LBB6_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 13
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_11
+; RV64-NEXT:  .LBB6_10:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_8:
-; RV64-NEXT:    fsh ft1, 26(sp)
+; RV64-NEXT:  .LBB6_11:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 26(sp)
+; RV64-NEXT:    bnez a0, .LBB6_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 12
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_14
+; RV64-NEXT:  .LBB6_13:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_10:
-; RV64-NEXT:    fsh ft1, 24(sp)
+; RV64-NEXT:  .LBB6_14:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB6_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 11
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_17
+; RV64-NEXT:  .LBB6_16:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_12:
-; RV64-NEXT:    fsh ft1, 22(sp)
+; RV64-NEXT:  .LBB6_17:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 22(sp)
+; RV64-NEXT:    bnez a0, .LBB6_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 10
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_20
+; RV64-NEXT:  .LBB6_19:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_14:
-; RV64-NEXT:    fsh ft1, 20(sp)
+; RV64-NEXT:  .LBB6_20:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 20(sp)
+; RV64-NEXT:    bnez a0, .LBB6_22
+; RV64-NEXT:  # %bb.21:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 9
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_23
+; RV64-NEXT:  .LBB6_22:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_16:
-; RV64-NEXT:    fsh ft1, 18(sp)
+; RV64-NEXT:  .LBB6_23:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 18(sp)
+; RV64-NEXT:    bnez a0, .LBB6_25
+; RV64-NEXT:  # %bb.24:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 8
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_26
+; RV64-NEXT:  .LBB6_25:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_18:
-; RV64-NEXT:    fsh ft1, 16(sp)
+; RV64-NEXT:  .LBB6_26:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB6_28
+; RV64-NEXT:  # %bb.27:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_29
+; RV64-NEXT:  .LBB6_28:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_20:
-; RV64-NEXT:    fsh ft1, 14(sp)
+; RV64-NEXT:  .LBB6_29:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 14(sp)
+; RV64-NEXT:    bnez a0, .LBB6_31
+; RV64-NEXT:  # %bb.30:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_32
+; RV64-NEXT:  .LBB6_31:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_22:
-; RV64-NEXT:    fsh ft1, 12(sp)
+; RV64-NEXT:  .LBB6_32:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB6_34
+; RV64-NEXT:  # %bb.33:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_35
+; RV64-NEXT:  .LBB6_34:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_24:
-; RV64-NEXT:    fsh ft1, 10(sp)
+; RV64-NEXT:  .LBB6_35:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 10(sp)
+; RV64-NEXT:    bnez a0, .LBB6_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_38
+; RV64-NEXT:  .LBB6_37:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_26:
-; RV64-NEXT:    fsh ft1, 8(sp)
+; RV64-NEXT:  .LBB6_38:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB6_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB6_41
+; RV64-NEXT:  .LBB6_40:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_28:
-; RV64-NEXT:    fsh ft1, 6(sp)
+; RV64-NEXT:  .LBB6_41:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 6(sp)
+; RV64-NEXT:    bnez a0, .LBB6_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_30:
-; RV64-NEXT:    fsh ft1, 4(sp)
+; RV64-NEXT:    j .LBB6_44
+; RV64-NEXT:  .LBB6_43:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB6_44:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 4(sp)
+; RV64-NEXT:    bnez a0, .LBB6_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB6_47
+; RV64-NEXT:  .LBB6_46:
+; RV64-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB6_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB6_32:
-; RV64-NEXT:    fsh ft1, 2(sp)
+; RV64-NEXT:  .LBB6_47:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 2(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV64-NEXT:    vle16.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -718,164 +786,181 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
 ; RV32-NEXT:    feq.h a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB7_2
+; RV32-NEXT:    bnez a0, .LBB7_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.h ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsh ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB7_4
 ; RV32-NEXT:  .LBB7_2:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 15
+; RV32-NEXT:    j .LBB7_5
+; RV32-NEXT:  .LBB7_3:
+; RV32-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsh ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB7_2
+; RV32-NEXT:  .LBB7_4:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 15
+; RV32-NEXT:  .LBB7_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_4:
-; RV32-NEXT:    fsh ft1, 30(sp)
+; RV32-NEXT:    fsh ft0, 30(sp)
+; RV32-NEXT:    bnez a0, .LBB7_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 14
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_8
+; RV32-NEXT:  .LBB7_7:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_6:
-; RV32-NEXT:    fsh ft1, 28(sp)
+; RV32-NEXT:  .LBB7_8:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 28(sp)
+; RV32-NEXT:    bnez a0, .LBB7_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 13
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_11
+; RV32-NEXT:  .LBB7_10:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_8:
-; RV32-NEXT:    fsh ft1, 26(sp)
+; RV32-NEXT:  .LBB7_11:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 26(sp)
+; RV32-NEXT:    bnez a0, .LBB7_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 12
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_14
+; RV32-NEXT:  .LBB7_13:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_10:
-; RV32-NEXT:    fsh ft1, 24(sp)
+; RV32-NEXT:  .LBB7_14:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB7_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 11
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_17
+; RV32-NEXT:  .LBB7_16:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_12:
-; RV32-NEXT:    fsh ft1, 22(sp)
+; RV32-NEXT:  .LBB7_17:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 22(sp)
+; RV32-NEXT:    bnez a0, .LBB7_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 10
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_20
+; RV32-NEXT:  .LBB7_19:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_14:
-; RV32-NEXT:    fsh ft1, 20(sp)
+; RV32-NEXT:  .LBB7_20:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 20(sp)
+; RV32-NEXT:    bnez a0, .LBB7_22
+; RV32-NEXT:  # %bb.21:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 9
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_23
+; RV32-NEXT:  .LBB7_22:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_16:
-; RV32-NEXT:    fsh ft1, 18(sp)
+; RV32-NEXT:  .LBB7_23:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 18(sp)
+; RV32-NEXT:    bnez a0, .LBB7_25
+; RV32-NEXT:  # %bb.24:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 8
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_26
+; RV32-NEXT:  .LBB7_25:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_18:
-; RV32-NEXT:    fsh ft1, 16(sp)
+; RV32-NEXT:  .LBB7_26:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB7_28
+; RV32-NEXT:  # %bb.27:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 7
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_29
+; RV32-NEXT:  .LBB7_28:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_20:
-; RV32-NEXT:    fsh ft1, 14(sp)
+; RV32-NEXT:  .LBB7_29:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 14(sp)
+; RV32-NEXT:    bnez a0, .LBB7_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_32
+; RV32-NEXT:  .LBB7_31:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_22:
-; RV32-NEXT:    fsh ft1, 12(sp)
+; RV32-NEXT:  .LBB7_32:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB7_34
+; RV32-NEXT:  # %bb.33:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_35
+; RV32-NEXT:  .LBB7_34:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_24:
-; RV32-NEXT:    fsh ft1, 10(sp)
+; RV32-NEXT:  .LBB7_35:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 10(sp)
+; RV32-NEXT:    bnez a0, .LBB7_37
+; RV32-NEXT:  # %bb.36:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_38
+; RV32-NEXT:  .LBB7_37:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_26:
-; RV32-NEXT:    fsh ft1, 8(sp)
+; RV32-NEXT:  .LBB7_38:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB7_40
+; RV32-NEXT:  # %bb.39:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB7_41
+; RV32-NEXT:  .LBB7_40:
+; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_28:
-; RV32-NEXT:    fsh ft1, 6(sp)
+; RV32-NEXT:  .LBB7_41:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 6(sp)
+; RV32-NEXT:    bnez a0, .LBB7_43
+; RV32-NEXT:  # %bb.42:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_30:
-; RV32-NEXT:    fsh ft1, 4(sp)
+; RV32-NEXT:    j .LBB7_44
+; RV32-NEXT:  .LBB7_43:
 ; RV32-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB7_44:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 4(sp)
+; RV32-NEXT:    bnez a0, .LBB7_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB7_47
+; RV32-NEXT:  .LBB7_46:
+; RV32-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB7_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.h ft1, ft0
-; RV32-NEXT:  .LBB7_32:
-; RV32-NEXT:    fsh ft1, 2(sp)
+; RV32-NEXT:  .LBB7_47:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsh ft0, 2(sp)
 ; RV32-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV32-NEXT:    vle16.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
@@ -896,164 +981,181 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
 ; RV64-NEXT:    feq.h a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB7_2
+; RV64-NEXT:    bnez a0, .LBB7_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.h ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsh ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB7_4
 ; RV64-NEXT:  .LBB7_2:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 15
+; RV64-NEXT:    j .LBB7_5
+; RV64-NEXT:  .LBB7_3:
+; RV64-NEXT:    vsetvli zero, zero, e16,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsh ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB7_2
+; RV64-NEXT:  .LBB7_4:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 15
+; RV64-NEXT:  .LBB7_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_4:
-; RV64-NEXT:    fsh ft1, 30(sp)
+; RV64-NEXT:    fsh ft0, 30(sp)
+; RV64-NEXT:    bnez a0, .LBB7_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 14
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_6:
-; RV64-NEXT:    fsh ft1, 28(sp)
+; RV64-NEXT:    j .LBB7_8
+; RV64-NEXT:  .LBB7_7:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 13
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.h ft1, ft0
+; RV64-NEXT:    vslidedown.vi v26, v8, 14
 ; RV64-NEXT:  .LBB7_8:
-; RV64-NEXT:    fsh ft1, 26(sp)
-; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 12
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v26
+; RV64-NEXT:    fsh ft0, 28(sp)
 ; RV64-NEXT:    bnez a0, .LBB7_10
 ; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.h ft1, ft0
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 13
+; RV64-NEXT:    j .LBB7_11
 ; RV64-NEXT:  .LBB7_10:
-; RV64-NEXT:    fsh ft1, 24(sp)
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 11
+; RV64-NEXT:    vslidedown.vi v26, v8, 13
+; RV64-NEXT:  .LBB7_11:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_12:
-; RV64-NEXT:    fsh ft1, 22(sp)
+; RV64-NEXT:    fsh ft0, 26(sp)
+; RV64-NEXT:    bnez a0, .LBB7_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 10
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_14:
-; RV64-NEXT:    fsh ft1, 20(sp)
+; RV64-NEXT:    vslidedown.vi v26, v10, 12
+; RV64-NEXT:    j .LBB7_14
+; RV64-NEXT:  .LBB7_13:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 9
+; RV64-NEXT:    vslidedown.vi v26, v8, 12
+; RV64-NEXT:  .LBB7_14:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v26
+; RV64-NEXT:    fsh ft0, 24(sp)
 ; RV64-NEXT:    bnez a0, .LBB7_16
 ; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.h ft1, ft0
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 11
+; RV64-NEXT:    j .LBB7_17
 ; RV64-NEXT:  .LBB7_16:
-; RV64-NEXT:    fsh ft1, 18(sp)
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 8
+; RV64-NEXT:    vslidedown.vi v26, v8, 11
+; RV64-NEXT:  .LBB7_17:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_18:
-; RV64-NEXT:    fsh ft1, 16(sp)
+; RV64-NEXT:    fsh ft0, 22(sp)
+; RV64-NEXT:    bnez a0, .LBB7_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 7
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_20:
-; RV64-NEXT:    fsh ft1, 14(sp)
+; RV64-NEXT:    vslidedown.vi v26, v10, 10
+; RV64-NEXT:    j .LBB7_20
+; RV64-NEXT:  .LBB7_19:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 6
+; RV64-NEXT:    vslidedown.vi v26, v8, 10
+; RV64-NEXT:  .LBB7_20:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v26
+; RV64-NEXT:    fsh ft0, 20(sp)
 ; RV64-NEXT:    bnez a0, .LBB7_22
 ; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.h ft1, ft0
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 9
+; RV64-NEXT:    j .LBB7_23
 ; RV64-NEXT:  .LBB7_22:
-; RV64-NEXT:    fsh ft1, 12(sp)
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 5
+; RV64-NEXT:    vslidedown.vi v26, v8, 9
+; RV64-NEXT:  .LBB7_23:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 18(sp)
+; RV64-NEXT:    bnez a0, .LBB7_25
+; RV64-NEXT:  # %bb.24:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 8
+; RV64-NEXT:    j .LBB7_26
+; RV64-NEXT:  .LBB7_25:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 8
+; RV64-NEXT:  .LBB7_26:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB7_28
+; RV64-NEXT:  # %bb.27:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 7
+; RV64-NEXT:    j .LBB7_29
+; RV64-NEXT:  .LBB7_28:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 7
+; RV64-NEXT:  .LBB7_29:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 14(sp)
+; RV64-NEXT:    bnez a0, .LBB7_31
+; RV64-NEXT:  # %bb.30:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 6
+; RV64-NEXT:    j .LBB7_32
+; RV64-NEXT:  .LBB7_31:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 6
+; RV64-NEXT:  .LBB7_32:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB7_34
+; RV64-NEXT:  # %bb.33:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 5
+; RV64-NEXT:    j .LBB7_35
+; RV64-NEXT:  .LBB7_34:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_24:
-; RV64-NEXT:    fsh ft1, 10(sp)
+; RV64-NEXT:  .LBB7_35:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 10(sp)
+; RV64-NEXT:    bnez a0, .LBB7_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB7_38
+; RV64-NEXT:  .LBB7_37:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_26:
-; RV64-NEXT:    fsh ft1, 8(sp)
+; RV64-NEXT:  .LBB7_38:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB7_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB7_41
+; RV64-NEXT:  .LBB7_40:
+; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_28:
-; RV64-NEXT:    fsh ft1, 6(sp)
+; RV64-NEXT:  .LBB7_41:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 6(sp)
+; RV64-NEXT:    bnez a0, .LBB7_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_30:
-; RV64-NEXT:    fsh ft1, 4(sp)
+; RV64-NEXT:    j .LBB7_44
+; RV64-NEXT:  .LBB7_43:
 ; RV64-NEXT:    vsetivli a1, 1, e16,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB7_44:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 4(sp)
+; RV64-NEXT:    bnez a0, .LBB7_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB7_47
+; RV64-NEXT:  .LBB7_46:
+; RV64-NEXT:    vsetivli a0, 1, e16,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB7_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.h ft1, ft0
-; RV64-NEXT:  .LBB7_32:
-; RV64-NEXT:    fsh ft1, 2(sp)
+; RV64-NEXT:  .LBB7_47:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsh ft0, 2(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e16,m2,ta,mu
 ; RV64-NEXT:    vle16.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -1069,20 +1171,22 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half
 define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft3, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft2, v25
 ; CHECK-NEXT:    bnez a0, .LBB8_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.s ft0, ft1
-; CHECK-NEXT:    fmv.s ft2, ft3
+; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB8_3
 ; CHECK-NEXT:  .LBB8_2:
+; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB8_3:
+; CHECK-NEXT:    vfmv.f.s ft1, v25
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
-; CHECK-NEXT:    vfmv.v.f v8, ft2
+; CHECK-NEXT:    vfmv.v.f v8, ft1
 ; CHECK-NEXT:    vfmv.s.f v8, ft0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x float> %a, <2 x float> %b
@@ -1093,23 +1197,27 @@ define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x floa
 ; CHECK-LABEL: selectcc_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.s a0, fa0, fa1
+; CHECK-NEXT:    bnez a0, .LBB9_2
+; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
+; CHECK-NEXT:    j .LBB9_3
+; CHECK-NEXT:  .LBB9_2:
+; CHECK-NEXT:    vsetivli a1, 1, e32,mf2,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB9_3:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    bnez a0, .LBB9_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.s ft0, ft1
-; CHECK-NEXT:  .LBB9_2:
 ; CHECK-NEXT:    vsetivli a1, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    vfmv.f.s ft1, v9
+; CHECK-NEXT:    bnez a0, .LBB9_5
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    j .LBB9_6
+; CHECK-NEXT:  .LBB9_5:
+; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB9_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.s ft0, ft1
-; CHECK-NEXT:  .LBB9_4:
+; CHECK-NEXT:  .LBB9_6:
 ; CHECK-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfmv.s.f v25, ft0
 ; CHECK-NEXT:    vmv1r.v v8, v25
@@ -1124,44 +1232,49 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB10_2
+; CHECK-NEXT:    bnez a0, .LBB10_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.s ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsw ft0, 0(sp)
+; CHECK-NEXT:    beqz a0, .LBB10_4
 ; CHECK-NEXT:  .LBB10_2:
+; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 3
+; CHECK-NEXT:    j .LBB10_5
+; CHECK-NEXT:  .LBB10_3:
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsw ft0, 0(sp)
+; CHECK-NEXT:    bnez a0, .LBB10_2
+; CHECK-NEXT:  .LBB10_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
+; CHECK-NEXT:  .LBB10_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB10_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB10_4:
-; CHECK-NEXT:    fsw ft1, 12(sp)
+; CHECK-NEXT:    fsw ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB10_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB10_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB10_6:
-; CHECK-NEXT:    fsw ft1, 8(sp)
+; CHECK-NEXT:    j .LBB10_8
+; CHECK-NEXT:  .LBB10_7:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB10_8:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsw ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB10_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB10_11
+; CHECK-NEXT:  .LBB10_10:
+; CHECK-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB10_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB10_8:
-; CHECK-NEXT:    fsw ft1, 4(sp)
+; CHECK-NEXT:  .LBB10_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsw ft0, 4(sp)
 ; CHECK-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (sp)
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -1176,44 +1289,49 @@ define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x floa
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    feq.s a0, fa0, fa1
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB11_2
+; CHECK-NEXT:    bnez a0, .LBB11_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.s ft0, ft1
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    fsw ft0, 0(sp)
+; CHECK-NEXT:    beqz a0, .LBB11_4
 ; CHECK-NEXT:  .LBB11_2:
+; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 3
+; CHECK-NEXT:    j .LBB11_5
+; CHECK-NEXT:  .LBB11_3:
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    fsw ft0, 0(sp)
+; CHECK-NEXT:    bnez a0, .LBB11_2
+; CHECK-NEXT:  .LBB11_4:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 3
+; CHECK-NEXT:  .LBB11_5:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 3
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB11_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB11_4:
-; CHECK-NEXT:    fsw ft1, 12(sp)
+; CHECK-NEXT:    fsw ft0, 12(sp)
+; CHECK-NEXT:    bnez a0, .LBB11_7
+; CHECK-NEXT:  # %bb.6:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 2
-; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 2
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB11_6
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB11_6:
-; CHECK-NEXT:    fsw ft1, 8(sp)
+; CHECK-NEXT:    j .LBB11_8
+; CHECK-NEXT:  .LBB11_7:
 ; CHECK-NEXT:    vsetivli a1, 1, e32,m1,ta,mu
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    vslidedown.vi v25, v8, 2
+; CHECK-NEXT:  .LBB11_8:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsw ft0, 8(sp)
+; CHECK-NEXT:    bnez a0, .LBB11_10
+; CHECK-NEXT:  # %bb.9:
+; CHECK-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB11_11
+; CHECK-NEXT:  .LBB11_10:
+; CHECK-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
-; CHECK-NEXT:    bnez a0, .LBB11_8
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    fmv.s ft1, ft0
-; CHECK-NEXT:  .LBB11_8:
-; CHECK-NEXT:    fsw ft1, 4(sp)
+; CHECK-NEXT:  .LBB11_11:
+; CHECK-NEXT:    vfmv.f.s ft0, v25
+; CHECK-NEXT:    fsw ft0, 4(sp)
 ; CHECK-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (sp)
 ; CHECK-NEXT:    addi sp, sp, 16
@@ -1235,84 +1353,93 @@ define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b)
 ; RV32-NEXT:    addi s0, sp, 64
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB12_2
+; RV32-NEXT:    bnez a0, .LBB12_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.s ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB12_4
 ; RV32-NEXT:  .LBB12_2:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 7
+; RV32-NEXT:    j .LBB12_5
+; RV32-NEXT:  .LBB12_3:
+; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB12_2
+; RV32-NEXT:  .LBB12_4:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 7
+; RV32-NEXT:  .LBB12_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_4:
-; RV32-NEXT:    fsw ft1, 28(sp)
+; RV32-NEXT:    fsw ft0, 28(sp)
+; RV32-NEXT:    bnez a0, .LBB12_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB12_8
+; RV32-NEXT:  .LBB12_7:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_6:
-; RV32-NEXT:    fsw ft1, 24(sp)
+; RV32-NEXT:  .LBB12_8:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB12_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB12_11
+; RV32-NEXT:  .LBB12_10:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_8:
-; RV32-NEXT:    fsw ft1, 20(sp)
+; RV32-NEXT:  .LBB12_11:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 20(sp)
+; RV32-NEXT:    bnez a0, .LBB12_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB12_14
+; RV32-NEXT:  .LBB12_13:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_10:
-; RV32-NEXT:    fsw ft1, 16(sp)
+; RV32-NEXT:  .LBB12_14:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB12_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB12_17
+; RV32-NEXT:  .LBB12_16:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_12:
-; RV32-NEXT:    fsw ft1, 12(sp)
+; RV32-NEXT:  .LBB12_17:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB12_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_14:
-; RV32-NEXT:    fsw ft1, 8(sp)
+; RV32-NEXT:    j .LBB12_20
+; RV32-NEXT:  .LBB12_19:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB12_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB12_22
+; RV32-NEXT:  # %bb.21:
+; RV32-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB12_23
+; RV32-NEXT:  .LBB12_22:
+; RV32-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB12_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB12_16:
-; RV32-NEXT:    fsw ft1, 4(sp)
+; RV32-NEXT:  .LBB12_23:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 4(sp)
 ; RV32-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vle32.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
@@ -1332,84 +1459,93 @@ define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b)
 ; RV64-NEXT:    addi s0, sp, 64
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB12_2
+; RV64-NEXT:    bnez a0, .LBB12_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.s ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB12_4
 ; RV64-NEXT:  .LBB12_2:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 7
+; RV64-NEXT:    j .LBB12_5
+; RV64-NEXT:  .LBB12_3:
+; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB12_2
+; RV64-NEXT:  .LBB12_4:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 7
+; RV64-NEXT:  .LBB12_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_4:
-; RV64-NEXT:    fsw ft1, 28(sp)
+; RV64-NEXT:    fsw ft0, 28(sp)
+; RV64-NEXT:    bnez a0, .LBB12_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB12_8
+; RV64-NEXT:  .LBB12_7:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_6:
-; RV64-NEXT:    fsw ft1, 24(sp)
+; RV64-NEXT:  .LBB12_8:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB12_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB12_11
+; RV64-NEXT:  .LBB12_10:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_8:
-; RV64-NEXT:    fsw ft1, 20(sp)
+; RV64-NEXT:  .LBB12_11:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 20(sp)
+; RV64-NEXT:    bnez a0, .LBB12_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB12_14
+; RV64-NEXT:  .LBB12_13:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_10:
-; RV64-NEXT:    fsw ft1, 16(sp)
+; RV64-NEXT:  .LBB12_14:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB12_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB12_17
+; RV64-NEXT:  .LBB12_16:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_12:
-; RV64-NEXT:    fsw ft1, 12(sp)
+; RV64-NEXT:  .LBB12_17:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB12_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_14:
-; RV64-NEXT:    fsw ft1, 8(sp)
+; RV64-NEXT:    j .LBB12_20
+; RV64-NEXT:  .LBB12_19:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB12_20:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB12_22
+; RV64-NEXT:  # %bb.21:
+; RV64-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB12_23
+; RV64-NEXT:  .LBB12_22:
+; RV64-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB12_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB12_16:
-; RV64-NEXT:    fsw ft1, 4(sp)
+; RV64-NEXT:  .LBB12_23:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 4(sp)
 ; RV64-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vle32.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -1434,84 +1570,93 @@ define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x floa
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
 ; RV32-NEXT:    feq.s a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB13_2
+; RV32-NEXT:    bnez a0, .LBB13_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.s ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB13_4
 ; RV32-NEXT:  .LBB13_2:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 7
+; RV32-NEXT:    j .LBB13_5
+; RV32-NEXT:  .LBB13_3:
+; RV32-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB13_2
+; RV32-NEXT:  .LBB13_4:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 7
+; RV32-NEXT:  .LBB13_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_4:
-; RV32-NEXT:    fsw ft1, 28(sp)
+; RV32-NEXT:    fsw ft0, 28(sp)
+; RV32-NEXT:    bnez a0, .LBB13_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 6
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB13_8
+; RV32-NEXT:  .LBB13_7:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_6:
-; RV32-NEXT:    fsw ft1, 24(sp)
+; RV32-NEXT:  .LBB13_8:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB13_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 5
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB13_11
+; RV32-NEXT:  .LBB13_10:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_8:
-; RV32-NEXT:    fsw ft1, 20(sp)
+; RV32-NEXT:  .LBB13_11:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 20(sp)
+; RV32-NEXT:    bnez a0, .LBB13_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 4
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB13_14
+; RV32-NEXT:  .LBB13_13:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_10:
-; RV32-NEXT:    fsw ft1, 16(sp)
+; RV32-NEXT:  .LBB13_14:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB13_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 3
-; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    j .LBB13_17
+; RV32-NEXT:  .LBB13_16:
+; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_12:
-; RV32-NEXT:    fsw ft1, 12(sp)
+; RV32-NEXT:  .LBB13_17:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB13_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_14:
-; RV32-NEXT:    fsw ft1, 8(sp)
+; RV32-NEXT:    j .LBB13_20
+; RV32-NEXT:  .LBB13_19:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB13_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB13_22
+; RV32-NEXT:  # %bb.21:
+; RV32-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB13_23
+; RV32-NEXT:  .LBB13_22:
+; RV32-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB13_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB13_16:
-; RV32-NEXT:    fsw ft1, 4(sp)
+; RV32-NEXT:  .LBB13_23:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsw ft0, 4(sp)
 ; RV32-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vle32.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
@@ -1532,84 +1677,93 @@ define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x floa
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
 ; RV64-NEXT:    feq.s a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB13_2
+; RV64-NEXT:    bnez a0, .LBB13_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.s ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB13_4
 ; RV64-NEXT:  .LBB13_2:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 7
+; RV64-NEXT:    j .LBB13_5
+; RV64-NEXT:  .LBB13_3:
+; RV64-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB13_2
+; RV64-NEXT:  .LBB13_4:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 7
+; RV64-NEXT:  .LBB13_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_4:
-; RV64-NEXT:    fsw ft1, 28(sp)
+; RV64-NEXT:    fsw ft0, 28(sp)
+; RV64-NEXT:    bnez a0, .LBB13_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 6
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB13_8
+; RV64-NEXT:  .LBB13_7:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_6:
-; RV64-NEXT:    fsw ft1, 24(sp)
+; RV64-NEXT:  .LBB13_8:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB13_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 5
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB13_11
+; RV64-NEXT:  .LBB13_10:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_8:
-; RV64-NEXT:    fsw ft1, 20(sp)
+; RV64-NEXT:  .LBB13_11:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 20(sp)
+; RV64-NEXT:    bnez a0, .LBB13_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 4
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB13_14
+; RV64-NEXT:  .LBB13_13:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_10:
-; RV64-NEXT:    fsw ft1, 16(sp)
+; RV64-NEXT:  .LBB13_14:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB13_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
-; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    j .LBB13_17
+; RV64-NEXT:  .LBB13_16:
+; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_12:
-; RV64-NEXT:    fsw ft1, 12(sp)
+; RV64-NEXT:  .LBB13_17:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB13_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_14:
-; RV64-NEXT:    fsw ft1, 8(sp)
+; RV64-NEXT:    j .LBB13_20
+; RV64-NEXT:  .LBB13_19:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB13_20:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB13_22
+; RV64-NEXT:  # %bb.21:
+; RV64-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB13_23
+; RV64-NEXT:  .LBB13_22:
+; RV64-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB13_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB13_16:
-; RV64-NEXT:    fsw ft1, 4(sp)
+; RV64-NEXT:  .LBB13_23:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsw ft0, 4(sp)
 ; RV64-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vle32.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -1634,164 +1788,181 @@ define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float>
 ; RV32-NEXT:    addi s0, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v12
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB14_2
+; RV32-NEXT:    bnez a0, .LBB14_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.s ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v12
+; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB14_4
 ; RV32-NEXT:  .LBB14_2:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 15
+; RV32-NEXT:    j .LBB14_5
+; RV32-NEXT:  .LBB14_3:
+; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB14_2
+; RV32-NEXT:  .LBB14_4:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 15
+; RV32-NEXT:  .LBB14_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_4:
-; RV32-NEXT:    fsw ft1, 60(sp)
+; RV32-NEXT:    fsw ft0, 60(sp)
+; RV32-NEXT:    bnez a0, .LBB14_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 14
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_8
+; RV32-NEXT:  .LBB14_7:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_6:
-; RV32-NEXT:    fsw ft1, 56(sp)
+; RV32-NEXT:  .LBB14_8:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 56(sp)
+; RV32-NEXT:    bnez a0, .LBB14_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 13
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_11
+; RV32-NEXT:  .LBB14_10:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_8:
-; RV32-NEXT:    fsw ft1, 52(sp)
+; RV32-NEXT:  .LBB14_11:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 52(sp)
+; RV32-NEXT:    bnez a0, .LBB14_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 12
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_14
+; RV32-NEXT:  .LBB14_13:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_10:
-; RV32-NEXT:    fsw ft1, 48(sp)
+; RV32-NEXT:  .LBB14_14:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 48(sp)
+; RV32-NEXT:    bnez a0, .LBB14_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 11
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_17
+; RV32-NEXT:  .LBB14_16:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_12:
-; RV32-NEXT:    fsw ft1, 44(sp)
+; RV32-NEXT:  .LBB14_17:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 44(sp)
+; RV32-NEXT:    bnez a0, .LBB14_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 10
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_20
+; RV32-NEXT:  .LBB14_19:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_14:
-; RV32-NEXT:    fsw ft1, 40(sp)
+; RV32-NEXT:  .LBB14_20:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB14_22
+; RV32-NEXT:  # %bb.21:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 9
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_23
+; RV32-NEXT:  .LBB14_22:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_16:
-; RV32-NEXT:    fsw ft1, 36(sp)
+; RV32-NEXT:  .LBB14_23:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 36(sp)
+; RV32-NEXT:    bnez a0, .LBB14_25
+; RV32-NEXT:  # %bb.24:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 8
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_26
+; RV32-NEXT:  .LBB14_25:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_18:
-; RV32-NEXT:    fsw ft1, 32(sp)
+; RV32-NEXT:  .LBB14_26:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB14_28
+; RV32-NEXT:  # %bb.27:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_29
+; RV32-NEXT:  .LBB14_28:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_20:
-; RV32-NEXT:    fsw ft1, 28(sp)
+; RV32-NEXT:  .LBB14_29:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 28(sp)
+; RV32-NEXT:    bnez a0, .LBB14_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_32
+; RV32-NEXT:  .LBB14_31:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_22:
-; RV32-NEXT:    fsw ft1, 24(sp)
+; RV32-NEXT:  .LBB14_32:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB14_34
+; RV32-NEXT:  # %bb.33:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_35
+; RV32-NEXT:  .LBB14_34:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_24:
-; RV32-NEXT:    fsw ft1, 20(sp)
+; RV32-NEXT:  .LBB14_35:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 20(sp)
+; RV32-NEXT:    bnez a0, .LBB14_37
+; RV32-NEXT:  # %bb.36:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_38
+; RV32-NEXT:  .LBB14_37:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_26:
-; RV32-NEXT:    fsw ft1, 16(sp)
+; RV32-NEXT:  .LBB14_38:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB14_40
+; RV32-NEXT:  # %bb.39:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB14_41
+; RV32-NEXT:  .LBB14_40:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_28:
-; RV32-NEXT:    fsw ft1, 12(sp)
+; RV32-NEXT:  .LBB14_41:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB14_43
+; RV32-NEXT:  # %bb.42:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_30:
-; RV32-NEXT:    fsw ft1, 8(sp)
+; RV32-NEXT:    j .LBB14_44
+; RV32-NEXT:  .LBB14_43:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    vslidedown.vi v28, v8, 2
+; RV32-NEXT:  .LBB14_44:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB14_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    j .LBB14_47
+; RV32-NEXT:  .LBB14_46:
+; RV32-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB14_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB14_32:
-; RV32-NEXT:    fsw ft1, 4(sp)
+; RV32-NEXT:  .LBB14_47:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 4(sp)
 ; RV32-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV32-NEXT:    vle32.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -1811,164 +1982,181 @@ define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float>
 ; RV64-NEXT:    addi s0, sp, 128
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v12
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB14_2
+; RV64-NEXT:    bnez a0, .LBB14_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.s ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v12
+; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB14_4
 ; RV64-NEXT:  .LBB14_2:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v8, 15
+; RV64-NEXT:    j .LBB14_5
+; RV64-NEXT:  .LBB14_3:
+; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB14_2
+; RV64-NEXT:  .LBB14_4:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 15
+; RV64-NEXT:  .LBB14_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_4:
-; RV64-NEXT:    fsw ft1, 60(sp)
+; RV64-NEXT:    fsw ft0, 60(sp)
+; RV64-NEXT:    bnez a0, .LBB14_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 14
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_8
+; RV64-NEXT:  .LBB14_7:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_6:
-; RV64-NEXT:    fsw ft1, 56(sp)
+; RV64-NEXT:  .LBB14_8:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB14_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 13
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_11
+; RV64-NEXT:  .LBB14_10:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_8:
-; RV64-NEXT:    fsw ft1, 52(sp)
+; RV64-NEXT:  .LBB14_11:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 52(sp)
+; RV64-NEXT:    bnez a0, .LBB14_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 12
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_14
+; RV64-NEXT:  .LBB14_13:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_10:
-; RV64-NEXT:    fsw ft1, 48(sp)
+; RV64-NEXT:  .LBB14_14:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB14_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 11
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_17
+; RV64-NEXT:  .LBB14_16:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_12:
-; RV64-NEXT:    fsw ft1, 44(sp)
+; RV64-NEXT:  .LBB14_17:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 44(sp)
+; RV64-NEXT:    bnez a0, .LBB14_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 10
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_20
+; RV64-NEXT:  .LBB14_19:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_14:
-; RV64-NEXT:    fsw ft1, 40(sp)
+; RV64-NEXT:  .LBB14_20:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB14_22
+; RV64-NEXT:  # %bb.21:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 9
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_23
+; RV64-NEXT:  .LBB14_22:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_16:
-; RV64-NEXT:    fsw ft1, 36(sp)
+; RV64-NEXT:  .LBB14_23:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 36(sp)
+; RV64-NEXT:    bnez a0, .LBB14_25
+; RV64-NEXT:  # %bb.24:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 8
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_26
+; RV64-NEXT:  .LBB14_25:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_18:
-; RV64-NEXT:    fsw ft1, 32(sp)
+; RV64-NEXT:  .LBB14_26:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB14_28
+; RV64-NEXT:  # %bb.27:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_29
+; RV64-NEXT:  .LBB14_28:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_20:
-; RV64-NEXT:    fsw ft1, 28(sp)
+; RV64-NEXT:  .LBB14_29:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 28(sp)
+; RV64-NEXT:    bnez a0, .LBB14_31
+; RV64-NEXT:  # %bb.30:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_32
+; RV64-NEXT:  .LBB14_31:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_22:
-; RV64-NEXT:    fsw ft1, 24(sp)
+; RV64-NEXT:  .LBB14_32:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB14_34
+; RV64-NEXT:  # %bb.33:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_35
+; RV64-NEXT:  .LBB14_34:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_24:
-; RV64-NEXT:    fsw ft1, 20(sp)
+; RV64-NEXT:  .LBB14_35:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 20(sp)
+; RV64-NEXT:    bnez a0, .LBB14_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_38
+; RV64-NEXT:  .LBB14_37:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_26:
-; RV64-NEXT:    fsw ft1, 16(sp)
+; RV64-NEXT:  .LBB14_38:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB14_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB14_41
+; RV64-NEXT:  .LBB14_40:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_28:
-; RV64-NEXT:    fsw ft1, 12(sp)
+; RV64-NEXT:  .LBB14_41:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB14_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_30:
-; RV64-NEXT:    fsw ft1, 8(sp)
+; RV64-NEXT:    j .LBB14_44
+; RV64-NEXT:  .LBB14_43:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    vslidedown.vi v28, v8, 2
+; RV64-NEXT:  .LBB14_44:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB14_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    j .LBB14_47
+; RV64-NEXT:  .LBB14_46:
+; RV64-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB14_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB14_32:
-; RV64-NEXT:    fsw ft1, 4(sp)
+; RV64-NEXT:  .LBB14_47:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 4(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV64-NEXT:    vle32.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -128
@@ -1993,164 +2181,181 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -64
 ; RV32-NEXT:    feq.s a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v12
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB15_2
+; RV32-NEXT:    bnez a0, .LBB15_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.s ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v12
+; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB15_4
 ; RV32-NEXT:  .LBB15_2:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 15
+; RV32-NEXT:    j .LBB15_5
+; RV32-NEXT:  .LBB15_3:
+; RV32-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsw ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB15_2
+; RV32-NEXT:  .LBB15_4:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 15
+; RV32-NEXT:  .LBB15_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_4:
-; RV32-NEXT:    fsw ft1, 60(sp)
+; RV32-NEXT:    fsw ft0, 60(sp)
+; RV32-NEXT:    bnez a0, .LBB15_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 14
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_6:
-; RV32-NEXT:    fsw ft1, 56(sp)
+; RV32-NEXT:    j .LBB15_8
+; RV32-NEXT:  .LBB15_7:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 13
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.s ft1, ft0
+; RV32-NEXT:    vslidedown.vi v28, v8, 14
 ; RV32-NEXT:  .LBB15_8:
-; RV32-NEXT:    fsw ft1, 52(sp)
-; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 12
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v28
+; RV32-NEXT:    fsw ft0, 56(sp)
 ; RV32-NEXT:    bnez a0, .LBB15_10
 ; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.s ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 13
+; RV32-NEXT:    j .LBB15_11
 ; RV32-NEXT:  .LBB15_10:
-; RV32-NEXT:    fsw ft1, 48(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 11
+; RV32-NEXT:    vslidedown.vi v28, v8, 13
+; RV32-NEXT:  .LBB15_11:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_12:
-; RV32-NEXT:    fsw ft1, 44(sp)
+; RV32-NEXT:    fsw ft0, 52(sp)
+; RV32-NEXT:    bnez a0, .LBB15_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 10
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_14:
-; RV32-NEXT:    fsw ft1, 40(sp)
+; RV32-NEXT:    vslidedown.vi v28, v12, 12
+; RV32-NEXT:    j .LBB15_14
+; RV32-NEXT:  .LBB15_13:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 9
+; RV32-NEXT:    vslidedown.vi v28, v8, 12
+; RV32-NEXT:  .LBB15_14:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v28
+; RV32-NEXT:    fsw ft0, 48(sp)
 ; RV32-NEXT:    bnez a0, .LBB15_16
 ; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.s ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 11
+; RV32-NEXT:    j .LBB15_17
 ; RV32-NEXT:  .LBB15_16:
-; RV32-NEXT:    fsw ft1, 36(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 8
+; RV32-NEXT:    vslidedown.vi v28, v8, 11
+; RV32-NEXT:  .LBB15_17:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 44(sp)
+; RV32-NEXT:    bnez a0, .LBB15_19
+; RV32-NEXT:  # %bb.18:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 10
+; RV32-NEXT:    j .LBB15_20
+; RV32-NEXT:  .LBB15_19:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 10
+; RV32-NEXT:  .LBB15_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB15_22
+; RV32-NEXT:  # %bb.21:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 9
+; RV32-NEXT:    j .LBB15_23
+; RV32-NEXT:  .LBB15_22:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 9
+; RV32-NEXT:  .LBB15_23:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 36(sp)
+; RV32-NEXT:    bnez a0, .LBB15_25
+; RV32-NEXT:  # %bb.24:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 8
+; RV32-NEXT:    j .LBB15_26
+; RV32-NEXT:  .LBB15_25:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_18:
-; RV32-NEXT:    fsw ft1, 32(sp)
+; RV32-NEXT:  .LBB15_26:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB15_28
+; RV32-NEXT:  # %bb.27:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 7
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB15_29
+; RV32-NEXT:  .LBB15_28:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_20:
-; RV32-NEXT:    fsw ft1, 28(sp)
+; RV32-NEXT:  .LBB15_29:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 28(sp)
+; RV32-NEXT:    bnez a0, .LBB15_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB15_32
+; RV32-NEXT:  .LBB15_31:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_22:
-; RV32-NEXT:    fsw ft1, 24(sp)
+; RV32-NEXT:  .LBB15_32:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB15_34
+; RV32-NEXT:  # %bb.33:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB15_35
+; RV32-NEXT:  .LBB15_34:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_24:
-; RV32-NEXT:    fsw ft1, 20(sp)
+; RV32-NEXT:  .LBB15_35:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 20(sp)
+; RV32-NEXT:    bnez a0, .LBB15_37
+; RV32-NEXT:  # %bb.36:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB15_38
+; RV32-NEXT:  .LBB15_37:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_26:
-; RV32-NEXT:    fsw ft1, 16(sp)
+; RV32-NEXT:  .LBB15_38:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB15_40
+; RV32-NEXT:  # %bb.39:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB15_41
+; RV32-NEXT:  .LBB15_40:
+; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_28:
-; RV32-NEXT:    fsw ft1, 12(sp)
+; RV32-NEXT:  .LBB15_41:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 12(sp)
+; RV32-NEXT:    bnez a0, .LBB15_43
+; RV32-NEXT:  # %bb.42:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_30:
-; RV32-NEXT:    fsw ft1, 8(sp)
+; RV32-NEXT:    j .LBB15_44
+; RV32-NEXT:  .LBB15_43:
 ; RV32-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    vslidedown.vi v28, v8, 2
+; RV32-NEXT:  .LBB15_44:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 8(sp)
+; RV32-NEXT:    bnez a0, .LBB15_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    j .LBB15_47
+; RV32-NEXT:  .LBB15_46:
+; RV32-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB15_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.s ft1, ft0
-; RV32-NEXT:  .LBB15_32:
-; RV32-NEXT:    fsw ft1, 4(sp)
+; RV32-NEXT:  .LBB15_47:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsw ft0, 4(sp)
 ; RV32-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV32-NEXT:    vle32.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -2171,164 +2376,181 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -64
 ; RV64-NEXT:    feq.s a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v12
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB15_2
+; RV64-NEXT:    bnez a0, .LBB15_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.s ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v12
+; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB15_4
 ; RV64-NEXT:  .LBB15_2:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v8, 15
+; RV64-NEXT:    j .LBB15_5
+; RV64-NEXT:  .LBB15_3:
+; RV64-NEXT:    vsetvli zero, zero, e32,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsw ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB15_2
+; RV64-NEXT:  .LBB15_4:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 15
+; RV64-NEXT:  .LBB15_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_4:
-; RV64-NEXT:    fsw ft1, 60(sp)
+; RV64-NEXT:    fsw ft0, 60(sp)
+; RV64-NEXT:    bnez a0, .LBB15_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 14
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_8
+; RV64-NEXT:  .LBB15_7:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_6:
-; RV64-NEXT:    fsw ft1, 56(sp)
+; RV64-NEXT:  .LBB15_8:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB15_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 13
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_11
+; RV64-NEXT:  .LBB15_10:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_8:
-; RV64-NEXT:    fsw ft1, 52(sp)
+; RV64-NEXT:  .LBB15_11:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 52(sp)
+; RV64-NEXT:    bnez a0, .LBB15_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 12
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_14
+; RV64-NEXT:  .LBB15_13:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_10:
-; RV64-NEXT:    fsw ft1, 48(sp)
+; RV64-NEXT:  .LBB15_14:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB15_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 11
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_17
+; RV64-NEXT:  .LBB15_16:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_12:
-; RV64-NEXT:    fsw ft1, 44(sp)
+; RV64-NEXT:  .LBB15_17:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 44(sp)
+; RV64-NEXT:    bnez a0, .LBB15_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 10
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_20
+; RV64-NEXT:  .LBB15_19:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_14:
-; RV64-NEXT:    fsw ft1, 40(sp)
+; RV64-NEXT:  .LBB15_20:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB15_22
+; RV64-NEXT:  # %bb.21:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 9
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_23
+; RV64-NEXT:  .LBB15_22:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_16:
-; RV64-NEXT:    fsw ft1, 36(sp)
+; RV64-NEXT:  .LBB15_23:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 36(sp)
+; RV64-NEXT:    bnez a0, .LBB15_25
+; RV64-NEXT:  # %bb.24:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 8
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_26
+; RV64-NEXT:  .LBB15_25:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_18:
-; RV64-NEXT:    fsw ft1, 32(sp)
+; RV64-NEXT:  .LBB15_26:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB15_28
+; RV64-NEXT:  # %bb.27:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 7
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_29
+; RV64-NEXT:  .LBB15_28:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_20:
-; RV64-NEXT:    fsw ft1, 28(sp)
+; RV64-NEXT:  .LBB15_29:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 28(sp)
+; RV64-NEXT:    bnez a0, .LBB15_31
+; RV64-NEXT:  # %bb.30:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_32
+; RV64-NEXT:  .LBB15_31:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_22:
-; RV64-NEXT:    fsw ft1, 24(sp)
+; RV64-NEXT:  .LBB15_32:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB15_34
+; RV64-NEXT:  # %bb.33:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_35
+; RV64-NEXT:  .LBB15_34:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_24:
-; RV64-NEXT:    fsw ft1, 20(sp)
+; RV64-NEXT:  .LBB15_35:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 20(sp)
+; RV64-NEXT:    bnez a0, .LBB15_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_38
+; RV64-NEXT:  .LBB15_37:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_26:
-; RV64-NEXT:    fsw ft1, 16(sp)
+; RV64-NEXT:  .LBB15_38:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB15_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB15_41
+; RV64-NEXT:  .LBB15_40:
+; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_28:
-; RV64-NEXT:    fsw ft1, 12(sp)
+; RV64-NEXT:  .LBB15_41:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 12(sp)
+; RV64-NEXT:    bnez a0, .LBB15_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_30:
-; RV64-NEXT:    fsw ft1, 8(sp)
+; RV64-NEXT:    j .LBB15_44
+; RV64-NEXT:  .LBB15_43:
 ; RV64-NEXT:    vsetivli a1, 1, e32,m4,ta,mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    vslidedown.vi v28, v8, 2
+; RV64-NEXT:  .LBB15_44:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 8(sp)
+; RV64-NEXT:    bnez a0, .LBB15_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    j .LBB15_47
+; RV64-NEXT:  .LBB15_46:
+; RV64-NEXT:    vsetivli a0, 1, e32,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB15_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.s ft1, ft0
-; RV64-NEXT:  .LBB15_32:
-; RV64-NEXT:    fsw ft1, 4(sp)
+; RV64-NEXT:  .LBB15_47:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsw ft0, 4(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e32,m4,ta,mu
 ; RV64-NEXT:    vle32.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -128
@@ -2344,20 +2566,22 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x
 define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vfmv.f.s ft1, v9
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft3, v25
-; CHECK-NEXT:    vslidedown.vi v25, v8, 1
-; CHECK-NEXT:    vfmv.f.s ft2, v25
 ; CHECK-NEXT:    bnez a0, .LBB16_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.d ft0, ft1
-; CHECK-NEXT:    fmv.d ft2, ft3
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v9, 1
+; CHECK-NEXT:    j .LBB16_3
 ; CHECK-NEXT:  .LBB16_2:
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB16_3:
+; CHECK-NEXT:    vfmv.f.s ft1, v25
 ; CHECK-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
-; CHECK-NEXT:    vfmv.v.f v8, ft2
+; CHECK-NEXT:    vfmv.v.f v8, ft1
 ; CHECK-NEXT:    vfmv.s.f v8, ft0
 ; CHECK-NEXT:    ret
   %v = select i1 %c, <2 x double> %a, <2 x double> %b
@@ -2368,23 +2592,27 @@ define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x
 ; CHECK-LABEL: selectcc_v2f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    feq.d a0, fa0, fa1
+; CHECK-NEXT:    bnez a0, .LBB17_2
+; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v9, 1
-; CHECK-NEXT:    vfmv.f.s ft1, v25
+; CHECK-NEXT:    j .LBB17_3
+; CHECK-NEXT:  .LBB17_2:
+; CHECK-NEXT:    vsetivli a1, 1, e64,m1,ta,mu
 ; CHECK-NEXT:    vslidedown.vi v25, v8, 1
+; CHECK-NEXT:  .LBB17_3:
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
-; CHECK-NEXT:    bnez a0, .LBB17_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    fmv.d ft0, ft1
-; CHECK-NEXT:  .LBB17_2:
 ; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
-; CHECK-NEXT:    vfmv.f.s ft1, v9
+; CHECK-NEXT:    bnez a0, .LBB17_5
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v9
+; CHECK-NEXT:    j .LBB17_6
+; CHECK-NEXT:  .LBB17_5:
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    bnez a0, .LBB17_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    fmv.d ft0, ft1
-; CHECK-NEXT:  .LBB17_4:
+; CHECK-NEXT:  .LBB17_6:
 ; CHECK-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.s.f v25, ft0
 ; CHECK-NEXT:    vmv1r.v v8, v25
@@ -2406,44 +2634,49 @@ define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %
 ; RV32-NEXT:    addi s0, sp, 64
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
-; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB18_2
+; RV32-NEXT:    bnez a0, .LBB18_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB18_4
 ; RV32-NEXT:  .LBB18_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 3
+; RV32-NEXT:    j .LBB18_5
+; RV32-NEXT:  .LBB18_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB18_2
+; RV32-NEXT:  .LBB18_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 3
+; RV32-NEXT:  .LBB18_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB18_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB18_4:
-; RV32-NEXT:    fsd ft1, 24(sp)
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB18_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB18_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB18_6:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    j .LBB18_8
+; RV32-NEXT:  .LBB18_7:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB18_8:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB18_10
+; RV32-NEXT:  # %bb.9:
+; RV32-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB18_11
+; RV32-NEXT:  .LBB18_10:
+; RV32-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB18_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB18_8:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:  .LBB18_11:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
@@ -2463,44 +2696,49 @@ define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %
 ; RV64-NEXT:    addi s0, sp, 64
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
-; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB18_2
+; RV64-NEXT:    bnez a0, .LBB18_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB18_4
 ; RV64-NEXT:  .LBB18_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 3
+; RV64-NEXT:    j .LBB18_5
+; RV64-NEXT:  .LBB18_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB18_2
+; RV64-NEXT:  .LBB18_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
+; RV64-NEXT:  .LBB18_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB18_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB18_4:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB18_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB18_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB18_6:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB18_8
+; RV64-NEXT:  .LBB18_7:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB18_8:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB18_10
+; RV64-NEXT:  # %bb.9:
+; RV64-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB18_11
+; RV64-NEXT:  .LBB18_10:
+; RV64-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB18_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB18_8:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB18_11:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -2525,44 +2763,49 @@ define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -32
 ; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v10
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB19_2
+; RV32-NEXT:    bnez a0, .LBB19_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v10
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB19_4
 ; RV32-NEXT:  .LBB19_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v8, 3
+; RV32-NEXT:    j .LBB19_5
+; RV32-NEXT:  .LBB19_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB19_2
+; RV32-NEXT:  .LBB19_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 3
+; RV32-NEXT:  .LBB19_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB19_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB19_4:
-; RV32-NEXT:    fsd ft1, 24(sp)
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB19_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v10, 2
-; RV32-NEXT:    vfmv.f.s ft0, v26
-; RV32-NEXT:    vslidedown.vi v26, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB19_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB19_6:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    j .LBB19_8
+; RV32-NEXT:  .LBB19_7:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
-; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    vslidedown.vi v26, v8, 2
+; RV32-NEXT:  .LBB19_8:
 ; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB19_10
+; RV32-NEXT:  # %bb.9:
+; RV32-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
+; RV32-NEXT:    vslidedown.vi v26, v10, 1
+; RV32-NEXT:    j .LBB19_11
+; RV32-NEXT:  .LBB19_10:
+; RV32-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
 ; RV32-NEXT:    vslidedown.vi v26, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v26
-; RV32-NEXT:    bnez a0, .LBB19_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB19_8:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:  .LBB19_11:
+; RV32-NEXT:    vfmv.f.s ft0, v26
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -64
@@ -2583,44 +2826,49 @@ define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -32
 ; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v10
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB19_2
+; RV64-NEXT:    bnez a0, .LBB19_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v10
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB19_4
 ; RV64-NEXT:  .LBB19_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v8, 3
+; RV64-NEXT:    j .LBB19_5
+; RV64-NEXT:  .LBB19_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m2,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB19_2
+; RV64-NEXT:  .LBB19_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 3
+; RV64-NEXT:  .LBB19_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB19_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB19_4:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB19_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v10, 2
-; RV64-NEXT:    vfmv.f.s ft0, v26
-; RV64-NEXT:    vslidedown.vi v26, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB19_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB19_6:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB19_8
+; RV64-NEXT:  .LBB19_7:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m2,ta,mu
-; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    vslidedown.vi v26, v8, 2
+; RV64-NEXT:  .LBB19_8:
 ; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB19_10
+; RV64-NEXT:  # %bb.9:
+; RV64-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
+; RV64-NEXT:    vslidedown.vi v26, v10, 1
+; RV64-NEXT:    j .LBB19_11
+; RV64-NEXT:  .LBB19_10:
+; RV64-NEXT:    vsetivli a0, 1, e64,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v26
-; RV64-NEXT:    bnez a0, .LBB19_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB19_8:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB19_11:
+; RV64-NEXT:    vfmv.f.s ft0, v26
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -64
@@ -2645,84 +2893,93 @@ define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %
 ; RV32-NEXT:    addi s0, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v12
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB20_2
+; RV32-NEXT:    bnez a0, .LBB20_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v12
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB20_4
 ; RV32-NEXT:  .LBB20_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 7
+; RV32-NEXT:    j .LBB20_5
+; RV32-NEXT:  .LBB20_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB20_2
+; RV32-NEXT:  .LBB20_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 7
+; RV32-NEXT:  .LBB20_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_4:
-; RV32-NEXT:    fsd ft1, 56(sp)
+; RV32-NEXT:    fsd ft0, 56(sp)
+; RV32-NEXT:    bnez a0, .LBB20_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB20_8
+; RV32-NEXT:  .LBB20_7:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_6:
-; RV32-NEXT:    fsd ft1, 48(sp)
+; RV32-NEXT:  .LBB20_8:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 48(sp)
+; RV32-NEXT:    bnez a0, .LBB20_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB20_11
+; RV32-NEXT:  .LBB20_10:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_8:
-; RV32-NEXT:    fsd ft1, 40(sp)
+; RV32-NEXT:  .LBB20_11:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB20_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB20_14
+; RV32-NEXT:  .LBB20_13:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_10:
-; RV32-NEXT:    fsd ft1, 32(sp)
+; RV32-NEXT:  .LBB20_14:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB20_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB20_17
+; RV32-NEXT:  .LBB20_16:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_12:
-; RV32-NEXT:    fsd ft1, 24(sp)
+; RV32-NEXT:  .LBB20_17:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB20_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_14:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    j .LBB20_20
+; RV32-NEXT:  .LBB20_19:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    vslidedown.vi v28, v8, 2
+; RV32-NEXT:  .LBB20_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB20_22
+; RV32-NEXT:  # %bb.21:
+; RV32-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    j .LBB20_23
+; RV32-NEXT:  .LBB20_22:
+; RV32-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB20_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB20_16:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:  .LBB20_23:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -2742,84 +2999,93 @@ define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %
 ; RV64-NEXT:    addi s0, sp, 128
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -64
-; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v12
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB20_2
+; RV64-NEXT:    bnez a0, .LBB20_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v12
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB20_4
 ; RV64-NEXT:  .LBB20_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v8, 7
+; RV64-NEXT:    j .LBB20_5
+; RV64-NEXT:  .LBB20_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB20_2
+; RV64-NEXT:  .LBB20_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 7
+; RV64-NEXT:  .LBB20_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_4:
-; RV64-NEXT:    fsd ft1, 56(sp)
+; RV64-NEXT:    fsd ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB20_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB20_8
+; RV64-NEXT:  .LBB20_7:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_6:
-; RV64-NEXT:    fsd ft1, 48(sp)
+; RV64-NEXT:  .LBB20_8:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB20_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB20_11
+; RV64-NEXT:  .LBB20_10:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_8:
-; RV64-NEXT:    fsd ft1, 40(sp)
+; RV64-NEXT:  .LBB20_11:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB20_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB20_14
+; RV64-NEXT:  .LBB20_13:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_10:
-; RV64-NEXT:    fsd ft1, 32(sp)
+; RV64-NEXT:  .LBB20_14:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB20_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB20_17
+; RV64-NEXT:  .LBB20_16:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_12:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:  .LBB20_17:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB20_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_14:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB20_20
+; RV64-NEXT:  .LBB20_19:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    vslidedown.vi v28, v8, 2
+; RV64-NEXT:  .LBB20_20:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB20_22
+; RV64-NEXT:  # %bb.21:
+; RV64-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    j .LBB20_23
+; RV64-NEXT:  .LBB20_22:
+; RV64-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB20_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB20_16:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB20_23:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -128
@@ -2844,84 +3110,93 @@ define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -64
 ; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v12
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB21_2
+; RV32-NEXT:    bnez a0, .LBB21_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v12
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB21_4
 ; RV32-NEXT:  .LBB21_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v8, 7
+; RV32-NEXT:    j .LBB21_5
+; RV32-NEXT:  .LBB21_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB21_2
+; RV32-NEXT:  .LBB21_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 7
+; RV32-NEXT:  .LBB21_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_4:
-; RV32-NEXT:    fsd ft1, 56(sp)
+; RV32-NEXT:    fsd ft0, 56(sp)
+; RV32-NEXT:    bnez a0, .LBB21_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 6
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB21_8
+; RV32-NEXT:  .LBB21_7:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_6:
-; RV32-NEXT:    fsd ft1, 48(sp)
+; RV32-NEXT:  .LBB21_8:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 48(sp)
+; RV32-NEXT:    bnez a0, .LBB21_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 5
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB21_11
+; RV32-NEXT:  .LBB21_10:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_8:
-; RV32-NEXT:    fsd ft1, 40(sp)
+; RV32-NEXT:  .LBB21_11:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB21_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 4
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB21_14
+; RV32-NEXT:  .LBB21_13:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_10:
-; RV32-NEXT:    fsd ft1, 32(sp)
+; RV32-NEXT:  .LBB21_14:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB21_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 3
-; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    j .LBB21_17
+; RV32-NEXT:  .LBB21_16:
+; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_12:
-; RV32-NEXT:    fsd ft1, 24(sp)
+; RV32-NEXT:  .LBB21_17:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB21_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v12, 2
-; RV32-NEXT:    vfmv.f.s ft0, v28
-; RV32-NEXT:    vslidedown.vi v28, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_14:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    j .LBB21_20
+; RV32-NEXT:  .LBB21_19:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
-; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    vslidedown.vi v28, v8, 2
+; RV32-NEXT:  .LBB21_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB21_22
+; RV32-NEXT:  # %bb.21:
+; RV32-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
+; RV32-NEXT:    vslidedown.vi v28, v12, 1
+; RV32-NEXT:    j .LBB21_23
+; RV32-NEXT:  .LBB21_22:
+; RV32-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
 ; RV32-NEXT:    vslidedown.vi v28, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v28
-; RV32-NEXT:    bnez a0, .LBB21_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB21_16:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:  .LBB21_23:
+; RV32-NEXT:    vfmv.f.s ft0, v28
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -128
@@ -2942,84 +3217,93 @@ define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -64
 ; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v12
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB21_2
+; RV64-NEXT:    bnez a0, .LBB21_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v12
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB21_4
 ; RV64-NEXT:  .LBB21_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v8, 7
+; RV64-NEXT:    j .LBB21_5
+; RV64-NEXT:  .LBB21_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m4,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB21_2
+; RV64-NEXT:  .LBB21_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 7
+; RV64-NEXT:  .LBB21_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_4:
-; RV64-NEXT:    fsd ft1, 56(sp)
+; RV64-NEXT:    fsd ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB21_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 6
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB21_8
+; RV64-NEXT:  .LBB21_7:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_6:
-; RV64-NEXT:    fsd ft1, 48(sp)
+; RV64-NEXT:  .LBB21_8:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB21_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 5
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB21_11
+; RV64-NEXT:  .LBB21_10:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_8:
-; RV64-NEXT:    fsd ft1, 40(sp)
+; RV64-NEXT:  .LBB21_11:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB21_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 4
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB21_14
+; RV64-NEXT:  .LBB21_13:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_10:
-; RV64-NEXT:    fsd ft1, 32(sp)
+; RV64-NEXT:  .LBB21_14:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB21_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 3
-; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    j .LBB21_17
+; RV64-NEXT:  .LBB21_16:
+; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_12:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:  .LBB21_17:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB21_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v12, 2
-; RV64-NEXT:    vfmv.f.s ft0, v28
-; RV64-NEXT:    vslidedown.vi v28, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_14:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB21_20
+; RV64-NEXT:  .LBB21_19:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m4,ta,mu
-; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    vslidedown.vi v28, v8, 2
+; RV64-NEXT:  .LBB21_20:
 ; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB21_22
+; RV64-NEXT:  # %bb.21:
+; RV64-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
+; RV64-NEXT:    vslidedown.vi v28, v12, 1
+; RV64-NEXT:    j .LBB21_23
+; RV64-NEXT:  .LBB21_22:
+; RV64-NEXT:    vsetivli a0, 1, e64,m4,ta,mu
 ; RV64-NEXT:    vslidedown.vi v28, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v28
-; RV64-NEXT:    bnez a0, .LBB21_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB21_16:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB21_23:
+; RV64-NEXT:    vfmv.f.s ft0, v28
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -128
@@ -3044,164 +3328,181 @@ define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x doubl
 ; RV32-NEXT:    addi s0, sp, 256
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -128
-; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v16
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB22_2
+; RV32-NEXT:    bnez a0, .LBB22_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v16
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB22_4
 ; RV32-NEXT:  .LBB22_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 15
+; RV32-NEXT:    j .LBB22_5
+; RV32-NEXT:  .LBB22_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB22_2
+; RV32-NEXT:  .LBB22_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 15
+; RV32-NEXT:  .LBB22_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_4:
-; RV32-NEXT:    fsd ft1, 120(sp)
+; RV32-NEXT:    fsd ft0, 120(sp)
+; RV32-NEXT:    bnez a0, .LBB22_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 14
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_6:
-; RV32-NEXT:    fsd ft1, 112(sp)
+; RV32-NEXT:    j .LBB22_8
+; RV32-NEXT:  .LBB22_7:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 13
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vslidedown.vi v24, v8, 14
 ; RV32-NEXT:  .LBB22_8:
-; RV32-NEXT:    fsd ft1, 104(sp)
-; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 12
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v24
+; RV32-NEXT:    fsd ft0, 112(sp)
 ; RV32-NEXT:    bnez a0, .LBB22_10
 ; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 13
+; RV32-NEXT:    j .LBB22_11
 ; RV32-NEXT:  .LBB22_10:
-; RV32-NEXT:    fsd ft1, 96(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 11
+; RV32-NEXT:    vslidedown.vi v24, v8, 13
+; RV32-NEXT:  .LBB22_11:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_12:
-; RV32-NEXT:    fsd ft1, 88(sp)
+; RV32-NEXT:    fsd ft0, 104(sp)
+; RV32-NEXT:    bnez a0, .LBB22_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 10
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_14:
-; RV32-NEXT:    fsd ft1, 80(sp)
+; RV32-NEXT:    vslidedown.vi v24, v16, 12
+; RV32-NEXT:    j .LBB22_14
+; RV32-NEXT:  .LBB22_13:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 9
+; RV32-NEXT:    vslidedown.vi v24, v8, 12
+; RV32-NEXT:  .LBB22_14:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v24
+; RV32-NEXT:    fsd ft0, 96(sp)
 ; RV32-NEXT:    bnez a0, .LBB22_16
 ; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 11
+; RV32-NEXT:    j .LBB22_17
 ; RV32-NEXT:  .LBB22_16:
-; RV32-NEXT:    fsd ft1, 72(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 8
+; RV32-NEXT:    vslidedown.vi v24, v8, 11
+; RV32-NEXT:  .LBB22_17:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_18:
-; RV32-NEXT:    fsd ft1, 64(sp)
+; RV32-NEXT:    fsd ft0, 88(sp)
+; RV32-NEXT:    bnez a0, .LBB22_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 7
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_20:
-; RV32-NEXT:    fsd ft1, 56(sp)
+; RV32-NEXT:    vslidedown.vi v24, v16, 10
+; RV32-NEXT:    j .LBB22_20
+; RV32-NEXT:  .LBB22_19:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 6
+; RV32-NEXT:    vslidedown.vi v24, v8, 10
+; RV32-NEXT:  .LBB22_20:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v24
+; RV32-NEXT:    fsd ft0, 80(sp)
 ; RV32-NEXT:    bnez a0, .LBB22_22
 ; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 9
+; RV32-NEXT:    j .LBB22_23
 ; RV32-NEXT:  .LBB22_22:
-; RV32-NEXT:    fsd ft1, 48(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 5
+; RV32-NEXT:    vslidedown.vi v24, v8, 9
+; RV32-NEXT:  .LBB22_23:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_24:
-; RV32-NEXT:    fsd ft1, 40(sp)
+; RV32-NEXT:    fsd ft0, 72(sp)
+; RV32-NEXT:    bnez a0, .LBB22_25
+; RV32-NEXT:  # %bb.24:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 4
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_26:
-; RV32-NEXT:    fsd ft1, 32(sp)
+; RV32-NEXT:    vslidedown.vi v24, v16, 8
+; RV32-NEXT:    j .LBB22_26
+; RV32-NEXT:  .LBB22_25:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 3
+; RV32-NEXT:    vslidedown.vi v24, v8, 8
+; RV32-NEXT:  .LBB22_26:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v24
+; RV32-NEXT:    fsd ft0, 64(sp)
 ; RV32-NEXT:    bnez a0, .LBB22_28
 ; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 7
+; RV32-NEXT:    j .LBB22_29
 ; RV32-NEXT:  .LBB22_28:
-; RV32-NEXT:    fsd ft1, 24(sp)
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v24, v16, 2
+; RV32-NEXT:    vslidedown.vi v24, v8, 7
+; RV32-NEXT:  .LBB22_29:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB22_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB22_30:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    fsd ft0, 56(sp)
+; RV32-NEXT:    bnez a0, .LBB22_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v16, v16, 1
-; RV32-NEXT:    vfmv.f.s ft0, v16
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v8
-; RV32-NEXT:    bnez a0, .LBB22_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.d ft1, ft0
+; RV32-NEXT:    vslidedown.vi v24, v16, 6
+; RV32-NEXT:    j .LBB22_32
+; RV32-NEXT:  .LBB22_31:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 6
 ; RV32-NEXT:  .LBB22_32:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 48(sp)
+; RV32-NEXT:    bnez a0, .LBB22_34
+; RV32-NEXT:  # %bb.33:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 5
+; RV32-NEXT:    j .LBB22_35
+; RV32-NEXT:  .LBB22_34:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 5
+; RV32-NEXT:  .LBB22_35:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB22_37
+; RV32-NEXT:  # %bb.36:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 4
+; RV32-NEXT:    j .LBB22_38
+; RV32-NEXT:  .LBB22_37:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 4
+; RV32-NEXT:  .LBB22_38:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB22_40
+; RV32-NEXT:  # %bb.39:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 3
+; RV32-NEXT:    j .LBB22_41
+; RV32-NEXT:  .LBB22_40:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 3
+; RV32-NEXT:  .LBB22_41:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB22_43
+; RV32-NEXT:  # %bb.42:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 2
+; RV32-NEXT:    j .LBB22_44
+; RV32-NEXT:  .LBB22_43:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 2
+; RV32-NEXT:  .LBB22_44:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB22_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v8, v16, 1
+; RV32-NEXT:    j .LBB22_47
+; RV32-NEXT:  .LBB22_46:
+; RV32-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v8, v8, 1
+; RV32-NEXT:  .LBB22_47:
+; RV32-NEXT:    vfmv.f.s ft0, v8
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -256
@@ -3221,164 +3522,181 @@ define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x doubl
 ; RV64-NEXT:    addi s0, sp, 256
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -128
-; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v16
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB22_2
+; RV64-NEXT:    bnez a0, .LBB22_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v16
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB22_4
 ; RV64-NEXT:  .LBB22_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 15
+; RV64-NEXT:    j .LBB22_5
+; RV64-NEXT:  .LBB22_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB22_2
+; RV64-NEXT:  .LBB22_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 15
+; RV64-NEXT:  .LBB22_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    vslidedown.vi v24, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_4:
-; RV64-NEXT:    fsd ft1, 120(sp)
+; RV64-NEXT:    fsd ft0, 120(sp)
+; RV64-NEXT:    bnez a0, .LBB22_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 14
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_8
+; RV64-NEXT:  .LBB22_7:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_6:
-; RV64-NEXT:    fsd ft1, 112(sp)
+; RV64-NEXT:  .LBB22_8:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 112(sp)
+; RV64-NEXT:    bnez a0, .LBB22_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 13
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_11
+; RV64-NEXT:  .LBB22_10:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_8:
-; RV64-NEXT:    fsd ft1, 104(sp)
+; RV64-NEXT:  .LBB22_11:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 104(sp)
+; RV64-NEXT:    bnez a0, .LBB22_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 12
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_14
+; RV64-NEXT:  .LBB22_13:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_10:
-; RV64-NEXT:    fsd ft1, 96(sp)
+; RV64-NEXT:  .LBB22_14:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 96(sp)
+; RV64-NEXT:    bnez a0, .LBB22_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 11
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_17
+; RV64-NEXT:  .LBB22_16:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_12:
-; RV64-NEXT:    fsd ft1, 88(sp)
+; RV64-NEXT:  .LBB22_17:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 88(sp)
+; RV64-NEXT:    bnez a0, .LBB22_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 10
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_20
+; RV64-NEXT:  .LBB22_19:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_14:
-; RV64-NEXT:    fsd ft1, 80(sp)
+; RV64-NEXT:  .LBB22_20:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 80(sp)
+; RV64-NEXT:    bnez a0, .LBB22_22
+; RV64-NEXT:  # %bb.21:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 9
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_23
+; RV64-NEXT:  .LBB22_22:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_16:
-; RV64-NEXT:    fsd ft1, 72(sp)
+; RV64-NEXT:  .LBB22_23:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 72(sp)
+; RV64-NEXT:    bnez a0, .LBB22_25
+; RV64-NEXT:  # %bb.24:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 8
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_26
+; RV64-NEXT:  .LBB22_25:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_18:
-; RV64-NEXT:    fsd ft1, 64(sp)
+; RV64-NEXT:  .LBB22_26:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 64(sp)
+; RV64-NEXT:    bnez a0, .LBB22_28
+; RV64-NEXT:  # %bb.27:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 7
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_29
+; RV64-NEXT:  .LBB22_28:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_20:
-; RV64-NEXT:    fsd ft1, 56(sp)
+; RV64-NEXT:  .LBB22_29:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB22_31
+; RV64-NEXT:  # %bb.30:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 6
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_32
+; RV64-NEXT:  .LBB22_31:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_22:
-; RV64-NEXT:    fsd ft1, 48(sp)
+; RV64-NEXT:  .LBB22_32:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB22_34
+; RV64-NEXT:  # %bb.33:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 5
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_35
+; RV64-NEXT:  .LBB22_34:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_24:
-; RV64-NEXT:    fsd ft1, 40(sp)
+; RV64-NEXT:  .LBB22_35:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB22_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 4
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_38
+; RV64-NEXT:  .LBB22_37:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_26:
-; RV64-NEXT:    fsd ft1, 32(sp)
+; RV64-NEXT:  .LBB22_38:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB22_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 3
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB22_41
+; RV64-NEXT:  .LBB22_40:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_28:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:  .LBB22_41:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB22_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 2
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    vslidedown.vi v24, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB22_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_30:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB22_44
+; RV64-NEXT:  .LBB22_43:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV64-NEXT:    vslidedown.vi v16, v16, 1
-; RV64-NEXT:    vfmv.f.s ft0, v16
+; RV64-NEXT:    vslidedown.vi v24, v8, 2
+; RV64-NEXT:  .LBB22_44:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB22_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
+; RV64-NEXT:    vslidedown.vi v8, v16, 1
+; RV64-NEXT:    j .LBB22_47
+; RV64-NEXT:  .LBB22_46:
+; RV64-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v8
-; RV64-NEXT:    bnez a0, .LBB22_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB22_32:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB22_47:
+; RV64-NEXT:    vfmv.f.s ft0, v8
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -256
@@ -3403,164 +3721,181 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    andi sp, sp, -128
 ; RV32-NEXT:    feq.d a0, fa0, fa1
-; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
-; RV32-NEXT:    vfmv.f.s ft1, v16
-; RV32-NEXT:    vfmv.f.s ft0, v8
-; RV32-NEXT:    bnez a0, .LBB23_2
+; RV32-NEXT:    bnez a0, .LBB23_3
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fmv.d ft0, ft1
+; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v16
+; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    beqz a0, .LBB23_4
 ; RV32-NEXT:  .LBB23_2:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 15
+; RV32-NEXT:    j .LBB23_5
+; RV32-NEXT:  .LBB23_3:
+; RV32-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV32-NEXT:    vfmv.f.s ft0, v8
 ; RV32-NEXT:    fsd ft0, 0(sp)
+; RV32-NEXT:    bnez a0, .LBB23_2
+; RV32-NEXT:  .LBB23_4:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 15
+; RV32-NEXT:  .LBB23_5:
 ; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 15
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_4
-; RV32-NEXT:  # %bb.3:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_4:
-; RV32-NEXT:    fsd ft1, 120(sp)
+; RV32-NEXT:    fsd ft0, 120(sp)
+; RV32-NEXT:    bnez a0, .LBB23_7
+; RV32-NEXT:  # %bb.6:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 14
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_8
+; RV32-NEXT:  .LBB23_7:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 14
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_6
-; RV32-NEXT:  # %bb.5:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_6:
-; RV32-NEXT:    fsd ft1, 112(sp)
+; RV32-NEXT:  .LBB23_8:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 112(sp)
+; RV32-NEXT:    bnez a0, .LBB23_10
+; RV32-NEXT:  # %bb.9:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 13
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_11
+; RV32-NEXT:  .LBB23_10:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 13
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_8
-; RV32-NEXT:  # %bb.7:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_8:
-; RV32-NEXT:    fsd ft1, 104(sp)
+; RV32-NEXT:  .LBB23_11:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 104(sp)
+; RV32-NEXT:    bnez a0, .LBB23_13
+; RV32-NEXT:  # %bb.12:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 12
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_14
+; RV32-NEXT:  .LBB23_13:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 12
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_10
-; RV32-NEXT:  # %bb.9:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_10:
-; RV32-NEXT:    fsd ft1, 96(sp)
+; RV32-NEXT:  .LBB23_14:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 96(sp)
+; RV32-NEXT:    bnez a0, .LBB23_16
+; RV32-NEXT:  # %bb.15:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 11
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_17
+; RV32-NEXT:  .LBB23_16:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 11
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_12
-; RV32-NEXT:  # %bb.11:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_12:
-; RV32-NEXT:    fsd ft1, 88(sp)
+; RV32-NEXT:  .LBB23_17:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 88(sp)
+; RV32-NEXT:    bnez a0, .LBB23_19
+; RV32-NEXT:  # %bb.18:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 10
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_20
+; RV32-NEXT:  .LBB23_19:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 10
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_14
-; RV32-NEXT:  # %bb.13:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_14:
-; RV32-NEXT:    fsd ft1, 80(sp)
+; RV32-NEXT:  .LBB23_20:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 80(sp)
+; RV32-NEXT:    bnez a0, .LBB23_22
+; RV32-NEXT:  # %bb.21:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 9
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_23
+; RV32-NEXT:  .LBB23_22:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 9
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_16
-; RV32-NEXT:  # %bb.15:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_16:
-; RV32-NEXT:    fsd ft1, 72(sp)
+; RV32-NEXT:  .LBB23_23:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 72(sp)
+; RV32-NEXT:    bnez a0, .LBB23_25
+; RV32-NEXT:  # %bb.24:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 8
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_26
+; RV32-NEXT:  .LBB23_25:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 8
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_18
-; RV32-NEXT:  # %bb.17:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_18:
-; RV32-NEXT:    fsd ft1, 64(sp)
+; RV32-NEXT:  .LBB23_26:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 64(sp)
+; RV32-NEXT:    bnez a0, .LBB23_28
+; RV32-NEXT:  # %bb.27:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 7
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_29
+; RV32-NEXT:  .LBB23_28:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 7
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_20
-; RV32-NEXT:  # %bb.19:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_20:
-; RV32-NEXT:    fsd ft1, 56(sp)
+; RV32-NEXT:  .LBB23_29:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 56(sp)
+; RV32-NEXT:    bnez a0, .LBB23_31
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 6
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_32
+; RV32-NEXT:  .LBB23_31:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 6
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_22
-; RV32-NEXT:  # %bb.21:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_22:
-; RV32-NEXT:    fsd ft1, 48(sp)
+; RV32-NEXT:  .LBB23_32:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 48(sp)
+; RV32-NEXT:    bnez a0, .LBB23_34
+; RV32-NEXT:  # %bb.33:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 5
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_35
+; RV32-NEXT:  .LBB23_34:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 5
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_24
-; RV32-NEXT:  # %bb.23:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_24:
-; RV32-NEXT:    fsd ft1, 40(sp)
+; RV32-NEXT:  .LBB23_35:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 40(sp)
+; RV32-NEXT:    bnez a0, .LBB23_37
+; RV32-NEXT:  # %bb.36:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 4
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_38
+; RV32-NEXT:  .LBB23_37:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 4
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_26
-; RV32-NEXT:  # %bb.25:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_26:
-; RV32-NEXT:    fsd ft1, 32(sp)
+; RV32-NEXT:  .LBB23_38:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 32(sp)
+; RV32-NEXT:    bnez a0, .LBB23_40
+; RV32-NEXT:  # %bb.39:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 3
-; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    j .LBB23_41
+; RV32-NEXT:  .LBB23_40:
+; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v8, 3
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_28
-; RV32-NEXT:  # %bb.27:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_28:
-; RV32-NEXT:    fsd ft1, 24(sp)
+; RV32-NEXT:  .LBB23_41:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 24(sp)
+; RV32-NEXT:    bnez a0, .LBB23_43
+; RV32-NEXT:  # %bb.42:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v24, v16, 2
-; RV32-NEXT:    vfmv.f.s ft0, v24
-; RV32-NEXT:    vslidedown.vi v24, v8, 2
-; RV32-NEXT:    vfmv.f.s ft1, v24
-; RV32-NEXT:    bnez a0, .LBB23_30
-; RV32-NEXT:  # %bb.29:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_30:
-; RV32-NEXT:    fsd ft1, 16(sp)
+; RV32-NEXT:    j .LBB23_44
+; RV32-NEXT:  .LBB23_43:
 ; RV32-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV32-NEXT:    vslidedown.vi v16, v16, 1
-; RV32-NEXT:    vfmv.f.s ft0, v16
+; RV32-NEXT:    vslidedown.vi v24, v8, 2
+; RV32-NEXT:  .LBB23_44:
+; RV32-NEXT:    vfmv.f.s ft0, v24
+; RV32-NEXT:    fsd ft0, 16(sp)
+; RV32-NEXT:    bnez a0, .LBB23_46
+; RV32-NEXT:  # %bb.45:
+; RV32-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
+; RV32-NEXT:    vslidedown.vi v8, v16, 1
+; RV32-NEXT:    j .LBB23_47
+; RV32-NEXT:  .LBB23_46:
+; RV32-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
 ; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s ft1, v8
-; RV32-NEXT:    bnez a0, .LBB23_32
-; RV32-NEXT:  # %bb.31:
-; RV32-NEXT:    fmv.d ft1, ft0
-; RV32-NEXT:  .LBB23_32:
-; RV32-NEXT:    fsd ft1, 8(sp)
+; RV32-NEXT:  .LBB23_47:
+; RV32-NEXT:    vfmv.f.s ft0, v8
+; RV32-NEXT:    fsd ft0, 8(sp)
 ; RV32-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV32-NEXT:    vle64.v v8, (sp)
 ; RV32-NEXT:    addi sp, s0, -256
@@ -3581,164 +3916,181 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    andi sp, sp, -128
 ; RV64-NEXT:    feq.d a0, fa0, fa1
-; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
-; RV64-NEXT:    vfmv.f.s ft1, v16
-; RV64-NEXT:    vfmv.f.s ft0, v8
-; RV64-NEXT:    bnez a0, .LBB23_2
+; RV64-NEXT:    bnez a0, .LBB23_3
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fmv.d ft0, ft1
+; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v16
+; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    beqz a0, .LBB23_4
 ; RV64-NEXT:  .LBB23_2:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 15
+; RV64-NEXT:    j .LBB23_5
+; RV64-NEXT:  .LBB23_3:
+; RV64-NEXT:    vsetvli zero, zero, e64,m8,ta,mu
+; RV64-NEXT:    vfmv.f.s ft0, v8
 ; RV64-NEXT:    fsd ft0, 0(sp)
+; RV64-NEXT:    bnez a0, .LBB23_2
+; RV64-NEXT:  .LBB23_4:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 15
+; RV64-NEXT:  .LBB23_5:
 ; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    vslidedown.vi v24, v8, 15
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_4
-; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_4:
-; RV64-NEXT:    fsd ft1, 120(sp)
+; RV64-NEXT:    fsd ft0, 120(sp)
+; RV64-NEXT:    bnez a0, .LBB23_7
+; RV64-NEXT:  # %bb.6:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 14
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_8
+; RV64-NEXT:  .LBB23_7:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 14
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_6
-; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_6:
-; RV64-NEXT:    fsd ft1, 112(sp)
+; RV64-NEXT:  .LBB23_8:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 112(sp)
+; RV64-NEXT:    bnez a0, .LBB23_10
+; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 13
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_11
+; RV64-NEXT:  .LBB23_10:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 13
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_8
-; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_8:
-; RV64-NEXT:    fsd ft1, 104(sp)
+; RV64-NEXT:  .LBB23_11:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 104(sp)
+; RV64-NEXT:    bnez a0, .LBB23_13
+; RV64-NEXT:  # %bb.12:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 12
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_14
+; RV64-NEXT:  .LBB23_13:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 12
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_10
-; RV64-NEXT:  # %bb.9:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_10:
-; RV64-NEXT:    fsd ft1, 96(sp)
+; RV64-NEXT:  .LBB23_14:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 96(sp)
+; RV64-NEXT:    bnez a0, .LBB23_16
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 11
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_17
+; RV64-NEXT:  .LBB23_16:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 11
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_12
-; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_12:
-; RV64-NEXT:    fsd ft1, 88(sp)
+; RV64-NEXT:  .LBB23_17:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 88(sp)
+; RV64-NEXT:    bnez a0, .LBB23_19
+; RV64-NEXT:  # %bb.18:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 10
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_20
+; RV64-NEXT:  .LBB23_19:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 10
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_14
-; RV64-NEXT:  # %bb.13:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_14:
-; RV64-NEXT:    fsd ft1, 80(sp)
+; RV64-NEXT:  .LBB23_20:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 80(sp)
+; RV64-NEXT:    bnez a0, .LBB23_22
+; RV64-NEXT:  # %bb.21:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 9
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_23
+; RV64-NEXT:  .LBB23_22:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 9
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_16
-; RV64-NEXT:  # %bb.15:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_16:
-; RV64-NEXT:    fsd ft1, 72(sp)
+; RV64-NEXT:  .LBB23_23:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 72(sp)
+; RV64-NEXT:    bnez a0, .LBB23_25
+; RV64-NEXT:  # %bb.24:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 8
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_26
+; RV64-NEXT:  .LBB23_25:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 8
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_18
-; RV64-NEXT:  # %bb.17:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_18:
-; RV64-NEXT:    fsd ft1, 64(sp)
+; RV64-NEXT:  .LBB23_26:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 64(sp)
+; RV64-NEXT:    bnez a0, .LBB23_28
+; RV64-NEXT:  # %bb.27:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 7
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_29
+; RV64-NEXT:  .LBB23_28:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 7
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_20
-; RV64-NEXT:  # %bb.19:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_20:
-; RV64-NEXT:    fsd ft1, 56(sp)
+; RV64-NEXT:  .LBB23_29:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 56(sp)
+; RV64-NEXT:    bnez a0, .LBB23_31
+; RV64-NEXT:  # %bb.30:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 6
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_32
+; RV64-NEXT:  .LBB23_31:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 6
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_22
-; RV64-NEXT:  # %bb.21:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_22:
-; RV64-NEXT:    fsd ft1, 48(sp)
+; RV64-NEXT:  .LBB23_32:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 48(sp)
+; RV64-NEXT:    bnez a0, .LBB23_34
+; RV64-NEXT:  # %bb.33:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 5
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_35
+; RV64-NEXT:  .LBB23_34:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 5
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_24
-; RV64-NEXT:  # %bb.23:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_24:
-; RV64-NEXT:    fsd ft1, 40(sp)
+; RV64-NEXT:  .LBB23_35:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 40(sp)
+; RV64-NEXT:    bnez a0, .LBB23_37
+; RV64-NEXT:  # %bb.36:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 4
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_38
+; RV64-NEXT:  .LBB23_37:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 4
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_26
-; RV64-NEXT:  # %bb.25:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_26:
-; RV64-NEXT:    fsd ft1, 32(sp)
+; RV64-NEXT:  .LBB23_38:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 32(sp)
+; RV64-NEXT:    bnez a0, .LBB23_40
+; RV64-NEXT:  # %bb.39:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 3
-; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    j .LBB23_41
+; RV64-NEXT:  .LBB23_40:
+; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v8, 3
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_28
-; RV64-NEXT:  # %bb.27:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_28:
-; RV64-NEXT:    fsd ft1, 24(sp)
+; RV64-NEXT:  .LBB23_41:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 24(sp)
+; RV64-NEXT:    bnez a0, .LBB23_43
+; RV64-NEXT:  # %bb.42:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v24, v16, 2
-; RV64-NEXT:    vfmv.f.s ft0, v24
-; RV64-NEXT:    vslidedown.vi v24, v8, 2
-; RV64-NEXT:    vfmv.f.s ft1, v24
-; RV64-NEXT:    bnez a0, .LBB23_30
-; RV64-NEXT:  # %bb.29:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_30:
-; RV64-NEXT:    fsd ft1, 16(sp)
+; RV64-NEXT:    j .LBB23_44
+; RV64-NEXT:  .LBB23_43:
 ; RV64-NEXT:    vsetivli a1, 1, e64,m8,ta,mu
-; RV64-NEXT:    vslidedown.vi v16, v16, 1
-; RV64-NEXT:    vfmv.f.s ft0, v16
+; RV64-NEXT:    vslidedown.vi v24, v8, 2
+; RV64-NEXT:  .LBB23_44:
+; RV64-NEXT:    vfmv.f.s ft0, v24
+; RV64-NEXT:    fsd ft0, 16(sp)
+; RV64-NEXT:    bnez a0, .LBB23_46
+; RV64-NEXT:  # %bb.45:
+; RV64-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
+; RV64-NEXT:    vslidedown.vi v8, v16, 1
+; RV64-NEXT:    j .LBB23_47
+; RV64-NEXT:  .LBB23_46:
+; RV64-NEXT:    vsetivli a0, 1, e64,m8,ta,mu
 ; RV64-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-NEXT:    vfmv.f.s ft1, v8
-; RV64-NEXT:    bnez a0, .LBB23_32
-; RV64-NEXT:  # %bb.31:
-; RV64-NEXT:    fmv.d ft1, ft0
-; RV64-NEXT:  .LBB23_32:
-; RV64-NEXT:    fsd ft1, 8(sp)
+; RV64-NEXT:  .LBB23_47:
+; RV64-NEXT:    vfmv.f.s ft0, v8
+; RV64-NEXT:    fsd ft0, 8(sp)
 ; RV64-NEXT:    vsetivli a0, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vle64.v v8, (sp)
 ; RV64-NEXT:    addi sp, s0, -256

diff  --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
index fdf82339e5e6d..206c570196e59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
@@ -15,8 +15,7 @@ define i64 @test(<vscale x 1 x i64> %0) nounwind {
   ; CHECK: bb.0.entry:
   ; CHECK:   liveins: $v8
   ; CHECK:   [[COPY:%[0-9]+]]:vr = COPY $v8
-  ; CHECK:   dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype
-  ; CHECK:   PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6, implicit $vl, implicit $vtype
+  ; CHECK:   PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6
   ; CHECK:   [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a)
   ; CHECK:   $x10 = COPY [[LD]]
   ; CHECK:   PseudoRET implicit $x10

diff  --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
index 4621a8bfaba05..42c9c485e7171 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
@@ -15,20 +15,21 @@ body:             |
     liveins: $v0, $v1, $v2, $v3
     ; CHECK-LABEL: name: mask_reg_alloc
     ; CHECK: liveins: $v0, $v1, $v2, $v3
-    ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype
+    ; CHECK: dead renamable $x10 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
+    ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype
     ; CHECK: renamable $v0 = COPY killed renamable $v1
-    ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype
-    ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, $noreg, -1, implicit $vl, implicit $vtype
+    ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype
+    ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, 1, 3, implicit $vl, implicit $vtype
     ; CHECK: PseudoRET implicit $v0
     %0:vr = COPY $v0
     %1:vr = COPY $v1
     %2:vr = COPY $v2
     %3:vr = COPY $v3
     %4:vmv0 = COPY %0
-    %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, $noreg, -1, implicit $vl, implicit $vtype
+    %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, 1, 3
     %6:vmv0 = COPY %1
-    %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, $noreg, -1, implicit $vl, implicit $vtype
-    %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, $noreg, -1, implicit $vl, implicit $vtype
+    %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, 1, 3
+    %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, 1, 3
     $v0 = COPY %8
     PseudoRET implicit $v0
 ...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
index 3b662d325fc24..b971bf6d64f51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
@@ -52,8 +52,7 @@ body:             |
     ; CHECK: $v0 = COPY [[COPY]]
     ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
     ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]]
-    ; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype
-    ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
+    ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load 64 from %ir.a, align 8)
     ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]]
     ; CHECK: PseudoRET implicit $v8m8
     %1:vr = COPY $v0
@@ -61,7 +60,7 @@ body:             |
     $v0 = COPY %1
     %3:vrm8 = IMPLICIT_DEF
     %4:vrm8nov0 = COPY %3
-    %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8)
+    %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load 64 from %ir.a, align 8)
     $v8m8 = COPY %2
     PseudoRET implicit $v8m8
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 26ad89c38b49a..085fb2b964f1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -13,7 +13,6 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -28,7 +27,6 @@ define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -44,7 +42,6 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.h fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -59,7 +56,6 @@ define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -102,7 +98,6 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -117,7 +112,6 @@ define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -160,7 +154,6 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.s fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -175,7 +168,6 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -218,7 +210,6 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -233,7 +224,6 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -249,7 +239,6 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v25
 ; CHECK-NEXT:    fadd.d fa0, fa0, ft0
 ; CHECK-NEXT:    ret
@@ -264,7 +253,6 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:    vfmv.v.f v25, fa0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredosum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -282,7 +270,6 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -298,7 +285,6 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -314,7 +300,6 @@ define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -332,7 +317,6 @@ define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -368,7 +352,6 @@ define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -386,7 +369,6 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -402,7 +384,6 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -418,7 +399,6 @@ define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -452,7 +432,6 @@ define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -472,7 +451,6 @@ define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -534,7 +512,6 @@ define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -552,7 +529,6 @@ define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -572,7 +548,6 @@ define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -590,7 +565,6 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -606,7 +580,6 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -622,7 +595,6 @@ define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -640,7 +612,6 @@ define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -676,7 +647,6 @@ define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -694,7 +664,6 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -710,7 +679,6 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -726,7 +694,6 @@ define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -760,7 +727,6 @@ define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -780,7 +746,6 @@ define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -842,7 +807,6 @@ define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -860,7 +824,6 @@ define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -880,7 +843,6 @@ define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-NEXT:    vfmv.v.f v25, ft0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
 ; CHECK-NEXT:    vfredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vfmv.f.s fa0, v25
 ; CHECK-NEXT:    ret
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
index 46cc6e59d156d..4689df426c7c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
@@ -10,7 +10,6 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -26,7 +25,6 @@ define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -43,7 +41,6 @@ define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -59,7 +56,6 @@ define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -76,7 +72,6 @@ define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -92,7 +87,6 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -108,7 +102,6 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -124,7 +117,6 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -140,7 +132,6 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -156,7 +147,6 @@ define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -173,7 +163,6 @@ define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -189,7 +178,6 @@ define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -206,7 +194,6 @@ define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -222,7 +209,6 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -238,7 +224,6 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -254,7 +239,6 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -270,7 +254,6 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -286,7 +269,6 @@ define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -303,7 +285,6 @@ define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -319,7 +300,6 @@ define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -336,7 +316,6 @@ define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -352,7 +331,6 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -368,7 +346,6 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -384,7 +361,6 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -400,7 +376,6 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -416,7 +391,6 @@ define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -433,7 +407,6 @@ define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -449,7 +422,6 @@ define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -467,7 +439,6 @@ define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -483,7 +454,6 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -499,7 +469,6 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -515,7 +484,6 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -531,7 +499,6 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -547,7 +514,6 @@ define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -564,7 +530,6 @@ define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -580,7 +545,6 @@ define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -598,7 +562,6 @@ define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -614,7 +577,6 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -630,7 +592,6 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -646,7 +607,6 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -777,7 +737,6 @@ define i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -793,7 +752,6 @@ define i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -810,7 +768,6 @@ define i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -826,7 +783,6 @@ define i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -844,7 +800,6 @@ define i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -860,7 +815,6 @@ define i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -876,7 +830,6 @@ define i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -892,7 +845,6 @@ define i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -1023,7 +975,6 @@ define i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1039,7 +990,6 @@ define i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1056,7 +1006,6 @@ define i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1072,7 +1021,6 @@ define i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1090,7 +1038,6 @@ define i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1106,7 +1053,6 @@ define i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1122,7 +1068,6 @@ define i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1138,7 +1083,6 @@ define i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1314,7 +1258,6 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1334,7 +1277,6 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1360,7 +1302,6 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1381,7 +1322,6 @@ define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1409,7 +1349,6 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1430,7 +1369,6 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1450,7 +1388,6 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1470,7 +1407,6 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1490,7 +1426,6 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1510,7 +1445,6 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1536,7 +1470,6 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1557,7 +1490,6 @@ define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1585,7 +1517,6 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1606,7 +1537,6 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1626,7 +1556,6 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu
@@ -1646,7 +1575,6 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    addi a1, zero, 32
 ; CHECK-NEXT:    vsetivli a2, 1, e64,m1,ta,mu

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
index f5d55a9bd7367..11b5a1d4400f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
@@ -10,7 +10,6 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -26,7 +25,6 @@ define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -43,7 +41,6 @@ define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -59,7 +56,6 @@ define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -76,7 +72,6 @@ define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -92,7 +87,6 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -108,7 +102,6 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -124,7 +117,6 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -140,7 +132,6 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -156,7 +147,6 @@ define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -173,7 +163,6 @@ define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -189,7 +178,6 @@ define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -206,7 +194,6 @@ define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -222,7 +209,6 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -238,7 +224,6 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -254,7 +239,6 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -270,7 +254,6 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -286,7 +269,6 @@ define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -303,7 +285,6 @@ define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -319,7 +300,6 @@ define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -336,7 +316,6 @@ define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -352,7 +331,6 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -368,7 +346,6 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -384,7 +361,6 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -400,7 +376,6 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -416,7 +391,6 @@ define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -433,7 +407,6 @@ define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -449,7 +422,6 @@ define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -467,7 +439,6 @@ define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -483,7 +454,6 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -499,7 +469,6 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -515,7 +484,6 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -531,7 +499,6 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -547,7 +514,6 @@ define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -564,7 +530,6 @@ define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -580,7 +545,6 @@ define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -598,7 +562,6 @@ define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -614,7 +577,6 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -630,7 +592,6 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -646,7 +607,6 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -777,7 +737,6 @@ define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -793,7 +752,6 @@ define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -810,7 +768,6 @@ define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -826,7 +783,6 @@ define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -844,7 +800,6 @@ define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -860,7 +815,6 @@ define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -876,7 +830,6 @@ define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -892,7 +845,6 @@ define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -1023,7 +975,6 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1039,7 +990,6 @@ define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1056,7 +1006,6 @@ define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1072,7 +1021,6 @@ define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1090,7 +1038,6 @@ define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1106,7 +1053,6 @@ define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1122,7 +1068,6 @@ define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1138,7 +1083,6 @@ define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1270,7 +1214,6 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1286,7 +1229,6 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1304,7 +1246,6 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1320,7 +1261,6 @@ define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1338,7 +1278,6 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1354,7 +1293,6 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1370,7 +1308,6 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1386,7 +1323,6 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1402,7 +1338,6 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredsum.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1418,7 +1353,6 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1436,7 +1370,6 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmax.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1452,7 +1385,6 @@ define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredminu.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@@ -1470,7 +1402,6 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.x v25, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredmin.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@@ -1486,7 +1417,6 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, -1
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredand.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -1502,7 +1432,6 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -1518,7 +1447,6 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:    vmv.v.i v25, 0
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25
-; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
 ; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
new file mode 100644
index 0000000000000..6f5d0e9c5c616
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -0,0 +1,354 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
+# RUN:     -run-pass=riscv-insert-vsetvli | FileCheck %s
+
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+  target triple = "riscv64"
+
+  ; Function Attrs: nounwind
+  define <vscale x 1 x i64> @add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2) #0 {
+  entry:
+    %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2)
+    ret <vscale x 1 x i64> %a
+  }
+
+  ; Function Attrs: nounwind
+  define <vscale x 1 x i64> @load_add(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
+  entry:
+    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
+    %b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
+    ret <vscale x 1 x i64> %b
+  }
+
+  ; Function Attrs: nounwind
+  define <vscale x 1 x i64> @load_zext(<vscale x 1 x i32>* %0, i64 %1) #0 {
+  entry:
+    %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %1)
+    %b = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %1)
+    ret <vscale x 1 x i64> %b
+  }
+
+  ; Function Attrs: nounwind readnone
+  declare i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64>) #1
+
+  ; Function Attrs: nounwind
+  define i64 @vmv_x_s(<vscale x 1 x i64> %0) #0 {
+  entry:
+    %a = call i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64> %0)
+    ret i64 %a
+  }
+
+  define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #2 {
+    %a = load <2 x i64>, <2 x i64>* %x, align 16
+    %b = load <2 x i64>, <2 x i64>* %y, align 16
+    %c = add <2 x i64> %a, %b
+    store <2 x i64> %c, <2 x i64>* %x, align 16
+    ret void
+  }
+
+  ; Function Attrs: nofree nosync nounwind readnone willreturn
+  declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #3
+
+  define i64 @vreduce_add_v2i64(<2 x i64>* %x) #2 {
+    %v = load <2 x i64>, <2 x i64>* %x, align 16
+    %red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v)
+    ret i64 %red
+  }
+
+  ; Function Attrs: nounwind
+  declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #0
+
+  ; Function Attrs: nounwind
+  define <vscale x 1 x i64> @vsetvli_add(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %avl) #0 {
+  entry:
+    %a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 1)
+    %b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %a)
+    ret <vscale x 1 x i64> %b
+  }
+
+  ; Function Attrs: nounwind readnone
+  declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
+
+  ; Function Attrs: nounwind readonly
+  declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #2
+
+  ; Function Attrs: nounwind readonly
+  declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #2
+
+  ; Function Attrs: nounwind readnone
+  declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
+
+  attributes #0 = { nounwind "target-features"="+experimental-v" }
+  attributes #1 = { nounwind readnone "target-features"="+experimental-v" }
+  attributes #2 = { "target-features"="+experimental-v" }
+  attributes #3 = { nofree nosync nounwind readnone willreturn "target-features"="+experimental-v" }
+  attributes #4 = { nounwind readonly "target-features"="+experimental-v" }
+
+...
+---
+name:            add
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vr }
+  - { id: 1, class: vr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: vr }
+liveins:
+  - { reg: '$v8', virtual-reg: '%0' }
+  - { reg: '$v9', virtual-reg: '%1' }
+  - { reg: '$x10', virtual-reg: '%2' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $v8, $v9, $x10
+
+    ; CHECK-LABEL: name: add
+    ; CHECK: liveins: $v8, $v9, $x10
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
+    ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
+    ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
+    ; CHECK: PseudoRET implicit $v8
+    %2:gpr = COPY $x10
+    %1:vr = COPY $v9
+    %0:vr = COPY $v8
+    %3:vr = PseudoVADD_VV_M1 %0, %1, %2, 6
+    $v8 = COPY %3
+    PseudoRET implicit $v8
+
+...
+---
+name:            load_add
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: vr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: vr }
+  - { id: 4, class: vr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$v8', virtual-reg: '%1' }
+  - { reg: '$x11', virtual-reg: '%2' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x10, $v8, $x11
+
+    ; CHECK-LABEL: name: load_add
+    ; CHECK: liveins: $x10, $v8, $x11
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
+    ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
+    ; CHECK: PseudoRET implicit $v8
+    %2:gpr = COPY $x11
+    %1:vr = COPY $v8
+    %0:gpr = COPY $x10
+    %3:vr = PseudoVLE64_V_M1 %0, %2, 6
+    %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6
+    $v8 = COPY %4
+    PseudoRET implicit $v8
+
+...
+---
+name:            load_zext
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: vr }
+  - { id: 3, class: vr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$x11', virtual-reg: '%1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: load_zext
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5, implicit $vl, implicit $vtype
+    ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: $v8 = COPY %3
+    ; CHECK: PseudoRET implicit $v8
+    %1:gpr = COPY $x11
+    %0:gpr = COPY $x10
+    %2:vr = PseudoVLE32_V_MF2 %0, %1, 5
+    early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed %2, %1, 6
+    $v8 = COPY %3
+    PseudoRET implicit $v8
+
+...
+---
+name:            vmv_x_s
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vr }
+  - { id: 1, class: gpr }
+liveins:
+  - { reg: '$v8', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $v8
+
+    ; CHECK-LABEL: name: vmv_x_s
+    ; CHECK: liveins: $v8
+    ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8
+    ; CHECK: dead $x0 = PseudoVSETVLI killed $x0, 88, implicit-def $vl, implicit-def $vtype, implicit $vl
+    ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[COPY]], 6, implicit $vtype
+    ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
+    ; CHECK: PseudoRET implicit $x10
+    %0:vr = COPY $v8
+    %1:gpr = PseudoVMV_X_S_M1 %0, 6
+    $x10 = COPY %1
+    PseudoRET implicit $x10
+
+...
+---
+name:            add_v2i64
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: vr }
+  - { id: 3, class: vr }
+  - { id: 4, class: vr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$x11', virtual-reg: '%1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: add_v2i64
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: dead %5:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x)
+    ; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.y)
+    ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6, implicit $vl, implicit $vtype
+    ; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store 16 into %ir.x)
+    ; CHECK: PseudoRET
+    %1:gpr = COPY $x11
+    %0:gpr = COPY $x10
+    %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x)
+    %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load 16 from %ir.y)
+    %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6
+    PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store 16 into %ir.x)
+    PseudoRET
+
+...
+---
+name:            vreduce_add_v2i64
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: vr }
+  - { id: 2, class: vr }
+  - { id: 3, class: vr }
+  - { id: 4, class: vr }
+  - { id: 5, class: gpr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $x10
+
+    ; CHECK-LABEL: name: vreduce_add_v2i64
+    ; CHECK: liveins: $x10
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: dead %6:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x)
+    ; CHECK: dead %7:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK: dead %8:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVREDSUM_VS_M1_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6, implicit $vl, implicit $vtype
+    ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 killed [[PseudoVREDSUM_VS_M1_]], 6, implicit $vtype
+    ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
+    ; CHECK: PseudoRET implicit $x10
+    %0:gpr = COPY $x10
+    %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x)
+    %2:vr = PseudoVMV_V_I_M1 0, $x0, 6
+    %4:vr = IMPLICIT_DEF
+    %3:vr = PseudoVREDSUM_VS_M1 %4, killed %1, killed %2, 2, 6
+    %5:gpr = PseudoVMV_X_S_M1 killed %3, 6
+    $x10 = COPY %5
+    PseudoRET implicit $x10
+
+...
+---
+name:            vsetvli_add
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vr }
+  - { id: 1, class: vr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: vr }
+liveins:
+  - { reg: '$v8', virtual-reg: '%0' }
+  - { reg: '$v9', virtual-reg: '%1' }
+  - { reg: '$x10', virtual-reg: '%2' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $v8, $v9, $x10
+
+    ; CHECK-LABEL: name: vsetvli_add
+    ; CHECK: liveins: $v8, $v9, $x10
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
+    ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
+    ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
+    ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
+    ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]]
+    ; CHECK: PseudoRET implicit $v8
+    %2:gpr = COPY $x10
+    %1:vr = COPY $v9
+    %0:vr = COPY $v8
+    %3:gpr = PseudoVSETVLI %2, 88, implicit-def dead $vl, implicit-def dead $vtype
+    %4:vr = PseudoVADD_VV_M1 %0, %1, killed %3, 6
+    $v8 = COPY %4
+    PseudoRET implicit $v8
+
+...

diff  --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index 40ea93c872440..44bc37d0e1ac8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
+# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
 
 --- |
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
@@ -41,7 +41,7 @@ body: |
     ; CHECK: PseudoRET
     %0:gpr = COPY $x10
     %1:gpr = COPY $x11
-    $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype
+    $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6
     PseudoVSPILL7_M1 killed renamable $v0_v1_v2_v3_v4_v5_v6, %stack.0, $x0
     renamable $v7_v8_v9_v10_v11_v12_v13 = PseudoVRELOAD7_M1 %stack.0, $x0
     VS1R_V killed $v8, %0:gpr


        


More information about the llvm-commits mailing list