[llvm] 321a71a - [RISCV] Optimize BUILD_VECTOR sequences that reveal hidden splats

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 25 03:42:13 PDT 2021


Author: Fraser Cormack
Date: 2021-03-25T10:35:31Z
New Revision: 321a71a77268c314c769a98d62c14609aff306e0

URL: https://github.com/llvm/llvm-project/commit/321a71a77268c314c769a98d62c14609aff306e0
DIFF: https://github.com/llvm/llvm-project/commit/321a71a77268c314c769a98d62c14609aff306e0.diff

LOG: [RISCV] Optimize BUILD_VECTOR sequences that reveal hidden splats

This patch adds further optimization techniques to RVV BUILD_VECTOR
lowering. It teaches the compiler to find splats of larger vector
element types "hidden" in smaller ones. For example, a v4i8 build_vector
(0x1, 0x2, 0x1, 0x2) could be splat as v2i16 0x0201. This is generally
more optimal than the dominant-element BUILD_VECTORs and so takes
priority.

This optimization is currently limited to all-constant-or-undef
BUILD_VECTORs as those were found to be the most common. There's no
reason this couldn't be extended to other BUILD_VECTORs, but the
additional bit-manipulation instructions may require more sophisticated
heuristics.

There are some cases where the materialization of the larger constant
takes more scalar instructions than it does to build the vector with
vector instructions. We could add heuristics to try and catch this.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D99195

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b10564fcf760..530f6458fa0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1132,6 +1132,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   SDValue Mask, VL;
   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
 
+  MVT XLenVT = Subtarget.getXLenVT();
   unsigned NumElts = Op.getNumOperands();
 
   if (VT.getVectorElementType() == MVT::i1) {
@@ -1167,7 +1168,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
       uint64_t Bits = 0;
       unsigned BitPos = 0, IntegerEltIdx = 0;
-      MVT XLenVT = Subtarget.getXLenVT();
       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
 
       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
@@ -1239,6 +1239,64 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     }
   }
 
+  // Attempt to detect "hidden" splats, which only reveal themselves as splats
+  // when re-interpreted as a vector with a larger element type. For example,
+  //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
+  // could be instead splat as
+  //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
+  // TODO: This optimization could also work on non-constant splats, but it
+  // would require bit-manipulation instructions to construct the splat value.
+  SmallVector<SDValue> Sequence;
+  unsigned EltBitSize = VT.getScalarSizeInBits();
+  const auto *BV = cast<BuildVectorSDNode>(Op);
+  if (VT.isInteger() && EltBitSize < 64 &&
+      ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
+      BV->getRepeatedSequence(Sequence) &&
+      (Sequence.size() * EltBitSize) <= 64) {
+    unsigned SeqLen = Sequence.size();
+    MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
+    MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
+    assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
+            ViaIntVT == MVT::i64) &&
+           "Unexpected sequence type");
+
+    unsigned EltIdx = 0;
+    uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
+    uint64_t SplatValue = 0;
+    // Construct the amalgamated value which can be splatted as this larger
+    // vector type.
+    for (const auto &SeqV : Sequence) {
+      if (!SeqV.isUndef())
+        SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
+                       << (EltIdx * EltBitSize));
+      EltIdx++;
+    }
+
+    // On RV64, sign-extend from 32 to 64 bits where possible in order to
+    // achieve better constant materializion.
+    if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
+      SplatValue = SignExtend64(SplatValue, 32);
+
+    // Since we can't introduce illegal i64 types at this stage, we can only
+    // perform an i64 splat on RV32 if it is its own sign-extended value. That
+    // way we can use RVV instructions to splat.
+    assert((ViaIntVT.bitsLE(XLenVT) ||
+            (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
+           "Unexpected bitcast sequence");
+    if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
+      SDValue ViaVL =
+          DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
+      MVT ViaContainerVT =
+          RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT,
+                                                                Subtarget);
+      SDValue Splat =
+          DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
+                      DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
+      Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
+      return DAG.getBitcast(VT, Splat);
+    }
+  }
+
   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
   // which constitute a large proportion of the elements. In such cases we can
   // splat a vector with the dominant element and make up the shortfall with
@@ -1270,7 +1328,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   }
 
   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
-  MVT XLenVT = Subtarget.getXLenVT();
   unsigned NumDefElts = NumElts - NumUndefElts;
   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 8a2b439d0186..b56fe797f5c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -368,127 +368,99 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32-LABEL: bitreverse_v2i64:
 ; LMULMAX2-RV32:       # %bb.0:
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vle64.v v26, (a0)
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 5
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v25, 0
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 24
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v27, v25, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v28, v26, v27
-; LMULMAX2-RV32-NEXT:    lui a1, 4080
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v29, v25, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v29
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v29, v25, 8, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v30, v26, v29
-; LMULMAX2-RV32-NEXT:    lui a2, 1044480
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v31, v25, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v30, v31
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v30, v28
+; LMULMAX2-RV32-NEXT:    vle64.v v25, (a0)
+; LMULMAX2-RV32-NEXT:    addi a6, zero, 56
+; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v25, a6
 ; LMULMAX2-RV32-NEXT:    addi a2, zero, 40
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v30, v25, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v31, v26, v30
-; LMULMAX2-RV32-NEXT:    lui a2, 16
-; LMULMAX2-RV32-NEXT:    addi a2, a2, -256
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v8, v25, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v31, v31, v8
-; LMULMAX2-RV32-NEXT:    addi a3, zero, 56
-; LMULMAX2-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v8, v25, a3, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v9, v26, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v31, v31, v9
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v31
-; LMULMAX2-RV32-NEXT:    vsll.vv v29, v26, v29
-; LMULMAX2-RV32-NEXT:    addi a3, zero, 255
+; LMULMAX2-RV32-NEXT:    vsrl.vx v27, v25, a2
+; LMULMAX2-RV32-NEXT:    lui a3, 16
+; LMULMAX2-RV32-NEXT:    addi a3, a3, -256
+; LMULMAX2-RV32-NEXT:    vand.vx v27, v27, a3
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v27, v26
+; LMULMAX2-RV32-NEXT:    addi a4, zero, 5
+; LMULMAX2-RV32-NEXT:    vsetivli a5, 1, e8,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a4
 ; LMULMAX2-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v31, a3
-; LMULMAX2-RV32-NEXT:    vmerge.vim v31, v31, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v29, v29, v31
-; LMULMAX2-RV32-NEXT:    vsll.vv v27, v26, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v31, a2
-; LMULMAX2-RV32-NEXT:    vmerge.vim v31, v31, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v27, v31
-; LMULMAX2-RV32-NEXT:    vor.vv v27, v27, v29
-; LMULMAX2-RV32-NEXT:    vsll.vv v29, v26, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
-; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v30, 0, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.i v27, 0
+; LMULMAX2-RV32-NEXT:    lui a4, 1044480
+; LMULMAX2-RV32-NEXT:    vmerge.vxm v27, v27, a4, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a4, 2, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vi v28, v25, 8
+; LMULMAX2-RV32-NEXT:    vand.vv v27, v28, v27
+; LMULMAX2-RV32-NEXT:    vsrl.vi v28, v25, 24
+; LMULMAX2-RV32-NEXT:    lui a4, 4080
+; LMULMAX2-RV32-NEXT:    vand.vx v28, v28, a4
+; LMULMAX2-RV32-NEXT:    vor.vv v27, v27, v28
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v27, v26
+; LMULMAX2-RV32-NEXT:    addi a5, zero, 255
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a5
+; LMULMAX2-RV32-NEXT:    vmerge.vim v27, v27, 0, v0
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v29, v29, v30
-; LMULMAX2-RV32-NEXT:    vsll.vv v26, v26, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v29
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v27
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT:    vsll.vi v28, v25, 8
+; LMULMAX2-RV32-NEXT:    vand.vv v27, v28, v27
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a3
+; LMULMAX2-RV32-NEXT:    vmerge.vim v28, v28, 0, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vsll.vi v29, v25, 24
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v29, v28
+; LMULMAX2-RV32-NEXT:    vor.vv v27, v28, v27
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a4
+; LMULMAX2-RV32-NEXT:    vmerge.vim v28, v28, 0, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vsll.vx v29, v25, a2
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v29, v28
+; LMULMAX2-RV32-NEXT:    vsll.vx v25, v25, a6
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v28
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v28, v25, 4, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v27, v27, v28
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX2-RV32-NEXT:    vsll.vi v26, v26, 4
 ; LMULMAX2-RV32-NEXT:    lui a1, 986895
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 240
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v29, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v29
-; LMULMAX2-RV32-NEXT:    vsrl.vv v26, v26, v28
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vsrl.vi v25, v25, 4
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v28, v25, 2, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v27, v27, v28
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX2-RV32-NEXT:    vsll.vi v26, v26, 2
 ; LMULMAX2-RV32-NEXT:    lui a1, 838861
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -820
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v29, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v29
-; LMULMAX2-RV32-NEXT:    vsrl.vv v26, v26, v28
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vsrl.vi v25, v25, 2
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v25, v25, 1, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v27, v27, v25
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX2-RV32-NEXT:    vsll.vi v26, v26, 1
 ; LMULMAX2-RV32-NEXT:    lui a1, 699051
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -1366
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v28
-; LMULMAX2-RV32-NEXT:    vsrl.vv v25, v26, v25
-; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vsrl.vi v25, v25, 1
+; LMULMAX2-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    vse64.v v25, (a0)
 ; LMULMAX2-RV32-NEXT:    ret
 ;
@@ -596,127 +568,99 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32-LABEL: bitreverse_v2i64:
 ; LMULMAX1-RV32:       # %bb.0:
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vle64.v v26, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, zero, 5
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.i v25, 0
-; LMULMAX1-RV32-NEXT:    addi a1, zero, 24
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v27, v25, a1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v28, v26, v27
-; LMULMAX1-RV32-NEXT:    lui a1, 4080
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v29, v25, a1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v28, v28, v29
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v29, v25, 8, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v30, v26, v29
-; LMULMAX1-RV32-NEXT:    lui a2, 1044480
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v31, v25, a2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v30, v30, v31
-; LMULMAX1-RV32-NEXT:    vor.vv v28, v30, v28
+; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT:    addi a6, zero, 56
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v25, a6
 ; LMULMAX1-RV32-NEXT:    addi a2, zero, 40
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v30, v25, a2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v31, v26, v30
-; LMULMAX1-RV32-NEXT:    lui a2, 16
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -256
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v25, a2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v8
-; LMULMAX1-RV32-NEXT:    addi a3, zero, 56
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v25, a3, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v26, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v31, v31, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v28, v28, v31
-; LMULMAX1-RV32-NEXT:    vsll.vv v29, v26, v29
-; LMULMAX1-RV32-NEXT:    addi a3, zero, 255
+; LMULMAX1-RV32-NEXT:    vsrl.vx v27, v25, a2
+; LMULMAX1-RV32-NEXT:    lui a3, 16
+; LMULMAX1-RV32-NEXT:    addi a3, a3, -256
+; LMULMAX1-RV32-NEXT:    vand.vx v27, v27, a3
+; LMULMAX1-RV32-NEXT:    vor.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT:    addi a4, zero, 5
+; LMULMAX1-RV32-NEXT:    vsetivli a5, 1, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a4
 ; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v31, a3
-; LMULMAX1-RV32-NEXT:    vmerge.vim v31, v31, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v29, v29, v31
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v26, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v31, a2
-; LMULMAX1-RV32-NEXT:    vmerge.vim v31, v31, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v31
-; LMULMAX1-RV32-NEXT:    vor.vv v27, v27, v29
-; LMULMAX1-RV32-NEXT:    vsll.vv v29, v26, v30
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v30, a1
-; LMULMAX1-RV32-NEXT:    vmerge.vim v30, v30, 0, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.i v27, 0
+; LMULMAX1-RV32-NEXT:    lui a4, 1044480
+; LMULMAX1-RV32-NEXT:    vmerge.vxm v27, v27, a4, v0
+; LMULMAX1-RV32-NEXT:    vsetivli a4, 2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsrl.vi v28, v25, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v27, v28, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v28, v25, 24
+; LMULMAX1-RV32-NEXT:    lui a4, 4080
+; LMULMAX1-RV32-NEXT:    vand.vx v28, v28, a4
+; LMULMAX1-RV32-NEXT:    vor.vv v27, v27, v28
+; LMULMAX1-RV32-NEXT:    vor.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT:    addi a5, zero, 255
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a5
+; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v27, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v29, v29, v30
-; LMULMAX1-RV32-NEXT:    vsll.vv v26, v26, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v29
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v27
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v28
+; LMULMAX1-RV32-NEXT:    vsll.vi v28, v25, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v27, v28, v27
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a3
+; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v28, 0, v0
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsll.vi v29, v25, 24
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v29, v28
+; LMULMAX1-RV32-NEXT:    vor.vv v27, v28, v27
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a4
+; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v28, 0, v0
+; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsll.vx v29, v25, a2
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v29, v28
+; LMULMAX1-RV32-NEXT:    vsll.vx v25, v25, a6
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 61681
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v25, 4, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v27, v28
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 4
 ; LMULMAX1-RV32-NEXT:    lui a1, 986895
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 240
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v29, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v29
-; LMULMAX1-RV32-NEXT:    vsrl.vv v26, v26, v28
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 4
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 209715
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v25, 2, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v27, v28
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 2
 ; LMULMAX1-RV32-NEXT:    lui a1, 838861
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, -820
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v29, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v29
-; LMULMAX1-RV32-NEXT:    vsrl.vv v26, v26, v28
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 2
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 349525
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v26, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v25, v25, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v27, v25
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 1
 ; LMULMAX1-RV32-NEXT:    lui a1, 699051
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, -1366
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v28
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v26, v25
-; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    vse64.v v25, (a0)
 ; LMULMAX1-RV32-NEXT:    ret
 ;
@@ -1288,127 +1232,99 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-LABEL: bitreverse_v4i64:
 ; LMULMAX2-RV32:       # %bb.0:
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vle64.v v28, (a0)
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 85
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v26, 0
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 24
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v30, v26, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v28, v30
-; LMULMAX2-RV32-NEXT:    lui a1, 4080
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v26, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v26, 8, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v12, v28, v10
-; LMULMAX2-RV32-NEXT:    lui a2, 1044480
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v14, v26, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v12, v12, v14
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v12, v8
+; LMULMAX2-RV32-NEXT:    vle64.v v26, (a0)
+; LMULMAX2-RV32-NEXT:    addi a6, zero, 56
+; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v26, a6
 ; LMULMAX2-RV32-NEXT:    addi a2, zero, 40
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v8, v26, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v14, v28, v8
-; LMULMAX2-RV32-NEXT:    lui a2, 16
-; LMULMAX2-RV32-NEXT:    addi a2, a2, -256
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v16, v26, a2, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v14, v14, v16
-; LMULMAX2-RV32-NEXT:    addi a3, zero, 56
-; LMULMAX2-RV32-NEXT:    vsetivli a4, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v16, v26, a3, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v18, v28, v16
-; LMULMAX2-RV32-NEXT:    vor.vv v14, v14, v18
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v12, v14
-; LMULMAX2-RV32-NEXT:    vsll.vv v10, v28, v10
-; LMULMAX2-RV32-NEXT:    addi a3, zero, 255
+; LMULMAX2-RV32-NEXT:    vsrl.vx v30, v26, a2
+; LMULMAX2-RV32-NEXT:    lui a3, 16
+; LMULMAX2-RV32-NEXT:    addi a3, a3, -256
+; LMULMAX2-RV32-NEXT:    vand.vx v30, v30, a3
+; LMULMAX2-RV32-NEXT:    vor.vv v28, v30, v28
+; LMULMAX2-RV32-NEXT:    addi a4, zero, 85
+; LMULMAX2-RV32-NEXT:    vsetivli a5, 1, e8,m1,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a4
 ; LMULMAX2-RV32-NEXT:    vsetivli a4, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v14, a3
-; LMULMAX2-RV32-NEXT:    vmerge.vim v14, v14, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT:    vsll.vv v30, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a3, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v14, a2
-; LMULMAX2-RV32-NEXT:    vmerge.vim v14, v14, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v30, v14
-; LMULMAX2-RV32-NEXT:    vor.vv v30, v30, v10
-; LMULMAX2-RV32-NEXT:    vsll.vv v8, v28, v8
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 0, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.i v30, 0
+; LMULMAX2-RV32-NEXT:    lui a4, 1044480
+; LMULMAX2-RV32-NEXT:    vmerge.vxm v30, v30, a4, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a4, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v26, 8
+; LMULMAX2-RV32-NEXT:    vand.vv v30, v8, v30
+; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v26, 24
+; LMULMAX2-RV32-NEXT:    lui a4, 4080
+; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a4
+; LMULMAX2-RV32-NEXT:    vor.vv v30, v30, v8
+; LMULMAX2-RV32-NEXT:    vor.vv v28, v30, v28
+; LMULMAX2-RV32-NEXT:    addi a5, zero, 255
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a5
+; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v30, 0, v0
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsll.vv v28, v28, v16
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v30
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v12
+; LMULMAX2-RV32-NEXT:    vsll.vi v8, v26, 8
+; LMULMAX2-RV32-NEXT:    vand.vv v30, v8, v30
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v8, a3
+; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v8, 0, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsll.vi v10, v26, 24
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v10, v8
+; LMULMAX2-RV32-NEXT:    vor.vv v30, v8, v30
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vmv.v.x v8, a4
+; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v8, 0, v0
+; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT:    vsll.vx v10, v26, a2
+; LMULMAX2-RV32-NEXT:    vand.vv v8, v10, v8
+; LMULMAX2-RV32-NEXT:    vsll.vx v26, v26, a6
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v8
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v26, 4, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v30, v30, v8
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT:    vsll.vi v28, v28, 4
 ; LMULMAX2-RV32-NEXT:    lui a1, 986895
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 240
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vv v28, v28, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v26, 4
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v26, 2, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v30, v30, v8
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT:    vsll.vi v28, v28, 2
 ; LMULMAX2-RV32-NEXT:    lui a1, 838861
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -820
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vv v28, v28, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v26, 2
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v26, v26, 1, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsll.vv v30, v30, v26
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT:    vsll.vi v28, v28, 1
 ; LMULMAX2-RV32-NEXT:    lui a1, 699051
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -1366
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v8, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vv v26, v28, v26
-; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v26, 1
+; LMULMAX2-RV32-NEXT:    vor.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    vse64.v v26, (a0)
 ; LMULMAX2-RV32-NEXT:    ret
 ;
@@ -1516,167 +1432,139 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-LABEL: bitreverse_v4i64:
 ; LMULMAX1-RV32:       # %bb.0:
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v13, (a1)
-; LMULMAX1-RV32-NEXT:    addi a2, zero, 5
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 1, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a2
+; LMULMAX1-RV32-NEXT:    vle64.v v30, (a1)
+; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT:    addi a6, zero, 56
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v30, a6
+; LMULMAX1-RV32-NEXT:    addi a7, zero, 40
+; LMULMAX1-RV32-NEXT:    vsrl.vx v27, v30, a7
+; LMULMAX1-RV32-NEXT:    lui a4, 16
+; LMULMAX1-RV32-NEXT:    addi a4, a4, -256
+; LMULMAX1-RV32-NEXT:    vand.vx v27, v27, a4
+; LMULMAX1-RV32-NEXT:    vor.vv v27, v27, v26
+; LMULMAX1-RV32-NEXT:    addi a5, zero, 5
+; LMULMAX1-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a5
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.i v30, 0
-; LMULMAX1-RV32-NEXT:    addi a2, zero, 24
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v26, v30, a2, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.i v26, 0
+; LMULMAX1-RV32-NEXT:    lui a2, 1044480
+; LMULMAX1-RV32-NEXT:    vmerge.vxm v26, v26, a2, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v27, v13, v26
-; LMULMAX1-RV32-NEXT:    lui a2, 4080
+; LMULMAX1-RV32-NEXT:    vsrl.vi v28, v30, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v28, v26
+; LMULMAX1-RV32-NEXT:    vsrl.vi v29, v30, 24
+; LMULMAX1-RV32-NEXT:    lui a5, 4080
+; LMULMAX1-RV32-NEXT:    vand.vx v29, v29, a5
+; LMULMAX1-RV32-NEXT:    vor.vv v28, v28, v29
+; LMULMAX1-RV32-NEXT:    vor.vv v31, v28, v27
+; LMULMAX1-RV32-NEXT:    addi a2, zero, 255
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v28, v30, a2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v29, v27, v28
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v30, 8, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v13, v27
-; LMULMAX1-RV32-NEXT:    lui a3, 1044480
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v31, v30, a3, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v31
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v8, v29
-; LMULMAX1-RV32-NEXT:    addi a3, zero, 40
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v29, v30, a3, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v13, v29
-; LMULMAX1-RV32-NEXT:    lui a3, 16
-; LMULMAX1-RV32-NEXT:    addi a3, a3, -256
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v9, v30, a3, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v8, v9
-; LMULMAX1-RV32-NEXT:    addi a4, zero, 56
-; LMULMAX1-RV32-NEXT:    vsetivli a5, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v30, a4, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v12, v13, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v11, v12
-; LMULMAX1-RV32-NEXT:    vor.vv v14, v10, v11
-; LMULMAX1-RV32-NEXT:    vsll.vv v11, v13, v27
-; LMULMAX1-RV32-NEXT:    addi a4, zero, 255
-; LMULMAX1-RV32-NEXT:    vsetivli a5, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a4
-; LMULMAX1-RV32-NEXT:    vmerge.vim v10, v10, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v12, v11, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v15, v13, v26
-; LMULMAX1-RV32-NEXT:    vsetivli a4, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a3
-; LMULMAX1-RV32-NEXT:    vmerge.vim v11, v11, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v15, v15, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v15, v15, v12
-; LMULMAX1-RV32-NEXT:    vsll.vv v16, v13, v29
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v12, a2
-; LMULMAX1-RV32-NEXT:    vmerge.vim v12, v12, 0, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a2
+; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v27, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v16, v16, v12
-; LMULMAX1-RV32-NEXT:    vsll.vv v13, v13, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v13, v13, v16
-; LMULMAX1-RV32-NEXT:    vor.vv v13, v13, v15
-; LMULMAX1-RV32-NEXT:    vor.vv v16, v13, v14
+; LMULMAX1-RV32-NEXT:    vsll.vi v28, v30, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v29, v28, v27
+; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a4
+; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v28, 0, v0
+; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsll.vi v8, v30, 24
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v28
+; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v29
+; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vmv.v.x v29, a5
+; LMULMAX1-RV32-NEXT:    vmerge.vim v29, v29, 0, v0
+; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT:    vsll.vx v9, v30, a7
+; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v29
+; LMULMAX1-RV32-NEXT:    vsll.vx v30, v30, a6
+; LMULMAX1-RV32-NEXT:    vor.vv v30, v30, v9
+; LMULMAX1-RV32-NEXT:    vor.vv v30, v30, v8
+; LMULMAX1-RV32-NEXT:    vor.vv v31, v30, v31
 ; LMULMAX1-RV32-NEXT:    lui a2, 61681
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a2
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v15, v16, v13
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v14, v30, 4, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v30, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v17, v15, v14
+; LMULMAX1-RV32-NEXT:    vand.vv v8, v31, v30
+; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX1-RV32-NEXT:    lui a2, 986895
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 240
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v15, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v16, v16, v15
-; LMULMAX1-RV32-NEXT:    vsrl.vv v16, v16, v14
-; LMULMAX1-RV32-NEXT:    vor.vv v18, v16, v17
+; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v9
+; LMULMAX1-RV32-NEXT:    vsrl.vi v31, v31, 4
+; LMULMAX1-RV32-NEXT:    vor.vv v31, v31, v8
 ; LMULMAX1-RV32-NEXT:    lui a2, 209715
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 819
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v16, a2
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v19, v18, v16
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v17, v30, 2, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v19, v19, v17
+; LMULMAX1-RV32-NEXT:    vand.vv v10, v31, v8
+; LMULMAX1-RV32-NEXT:    vsll.vi v10, v10, 2
 ; LMULMAX1-RV32-NEXT:    lui a2, 838861
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, -820
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v20, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v18, v18, v20
-; LMULMAX1-RV32-NEXT:    vsrl.vv v18, v18, v17
-; LMULMAX1-RV32-NEXT:    vor.vv v18, v18, v19
+; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v11
+; LMULMAX1-RV32-NEXT:    vsrl.vi v31, v31, 2
+; LMULMAX1-RV32-NEXT:    vor.vv v31, v31, v10
 ; LMULMAX1-RV32-NEXT:    lui a2, 349525
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v19, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v21, v18, v19
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v30, v30, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsll.vv v21, v21, v30
+; LMULMAX1-RV32-NEXT:    vand.vv v12, v31, v10
+; LMULMAX1-RV32-NEXT:    vsll.vi v12, v12, 1
 ; LMULMAX1-RV32-NEXT:    lui a2, 699051
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, -1366
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v22, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v18, v18, v22
-; LMULMAX1-RV32-NEXT:    vsrl.vv v18, v18, v30
-; LMULMAX1-RV32-NEXT:    vor.vv v18, v18, v21
-; LMULMAX1-RV32-NEXT:    vsrl.vv v21, v25, v26
-; LMULMAX1-RV32-NEXT:    vand.vv v28, v21, v28
-; LMULMAX1-RV32-NEXT:    vsrl.vv v21, v25, v27
-; LMULMAX1-RV32-NEXT:    vand.vv v31, v21, v31
-; LMULMAX1-RV32-NEXT:    vor.vv v28, v31, v28
-; LMULMAX1-RV32-NEXT:    vsrl.vv v31, v25, v29
-; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v25, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v31, v31, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v28, v28, v31
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v25, v27
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v26, v25, v26
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v27
-; LMULMAX1-RV32-NEXT:    vsll.vv v27, v25, v29
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v12
-; LMULMAX1-RV32-NEXT:    vsll.vv v25, v25, v8
+; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v13
+; LMULMAX1-RV32-NEXT:    vsrl.vi v31, v31, 1
+; LMULMAX1-RV32-NEXT:    vor.vv v31, v31, v12
+; LMULMAX1-RV32-NEXT:    vsrl.vx v12, v25, a6
+; LMULMAX1-RV32-NEXT:    vsrl.vx v14, v25, a7
+; LMULMAX1-RV32-NEXT:    vand.vx v14, v14, a4
+; LMULMAX1-RV32-NEXT:    vor.vv v12, v14, v12
+; LMULMAX1-RV32-NEXT:    vsrl.vi v14, v25, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v14, v26
+; LMULMAX1-RV32-NEXT:    vsrl.vi v14, v25, 24
+; LMULMAX1-RV32-NEXT:    vand.vx v14, v14, a5
+; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v14
+; LMULMAX1-RV32-NEXT:    vor.vv v26, v26, v12
+; LMULMAX1-RV32-NEXT:    vsll.vi v12, v25, 8
+; LMULMAX1-RV32-NEXT:    vand.vv v27, v12, v27
+; LMULMAX1-RV32-NEXT:    vsll.vi v12, v25, 24
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v12, v28
+; LMULMAX1-RV32-NEXT:    vor.vv v27, v28, v27
+; LMULMAX1-RV32-NEXT:    vsll.vx v28, v25, a7
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v28, v29
+; LMULMAX1-RV32-NEXT:    vsll.vx v25, v25, a6
+; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v28
 ; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v27
 ; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v13
-; LMULMAX1-RV32-NEXT:    vsll.vv v26, v26, v14
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v15
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v25, v14
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v30
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 4
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v9
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 4
 ; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v16
-; LMULMAX1-RV32-NEXT:    vsll.vv v26, v26, v17
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v20
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v25, v17
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v8
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 2
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v11
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v19
-; LMULMAX1-RV32-NEXT:    vsll.vv v26, v26, v30
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v22
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v25, v30
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v25, v10
+; LMULMAX1-RV32-NEXT:    vsll.vi v26, v26, 1
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v13
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 1
 ; LMULMAX1-RV32-NEXT:    vor.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v18, (a1)
+; LMULMAX1-RV32-NEXT:    vse64.v v31, (a1)
 ; LMULMAX1-RV32-NEXT:    ret
 ;
 ; LMULMAX1-RV64-LABEL: bitreverse_v4i64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 09b81e0d2fa7..9678f8ffb404 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -264,55 +264,39 @@ define void @ctpop_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV32:       # %bb.0:
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; LMULMAX2-RV32-NEXT:    vle64.v v25, (a0)
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 5
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v26, 0
-; LMULMAX2-RV32-NEXT:    vmerge.vim v27, v26, 1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v27, v25, v27
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v25, 1
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v27, v28
-; LMULMAX2-RV32-NEXT:    vsub.vv v25, v25, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v27, v26, 2, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v27, v25, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v27
+; LMULMAX2-RV32-NEXT:    vsub.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v27, v27, v28
-; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v28
-; LMULMAX2-RV32-NEXT:    vadd.vv v25, v25, v27
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v27, v26, 4, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v27, v25, v27
-; LMULMAX2-RV32-NEXT:    vadd.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v27, v25, v26
+; LMULMAX2-RV32-NEXT:    vsrl.vi v25, v25, 2
+; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v26
+; LMULMAX2-RV32-NEXT:    vadd.vv v25, v27, v25
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v25, 4
+; LMULMAX2-RV32-NEXT:    vadd.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vand.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmul.vv v25, v25, v27
+; LMULMAX2-RV32-NEXT:    vmul.vv v25, v25, v26
 ; LMULMAX2-RV32-NEXT:    addi a1, zero, 56
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v25, v25, v26
+; LMULMAX2-RV32-NEXT:    vsrl.vx v25, v25, a1
 ; LMULMAX2-RV32-NEXT:    vse64.v v25, (a0)
 ; LMULMAX2-RV32-NEXT:    ret
 ;
@@ -370,55 +354,39 @@ define void @ctpop_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV32:       # %bb.0:
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, zero, 5
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.i v26, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v26, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v27, v25, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v26, v25, 1
 ; LMULMAX1-RV32-NEXT:    lui a1, 349525
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v28
-; LMULMAX1-RV32-NEXT:    vsub.vv v25, v25, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v26, 2, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v27, v25, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT:    vsub.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 209715
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a1
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v28
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v27
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v27, v26, 4, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v27, v25, v27
-; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v27, v25, v26
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 2
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT:    vadd.vv v25, v27, v25
+; LMULMAX1-RV32-NEXT:    vsrl.vi v26, v25, 4
+; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 61681
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    lui a1, 4112
 ; LMULMAX1-RV32-NEXT:    addi a1, a1, 257
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a1
+; LMULMAX1-RV32-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmul.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vmul.vv v25, v25, v26
 ; LMULMAX1-RV32-NEXT:    addi a1, zero, 56
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a1
 ; LMULMAX1-RV32-NEXT:    vse64.v v25, (a0)
 ; LMULMAX1-RV32-NEXT:    ret
 ;
@@ -831,56 +799,40 @@ define void @ctpop_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV32-LABEL: ctpop_v4i64:
 ; LMULMAX2-RV32:       # %bb.0:
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vle64.v v28, (a0)
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 85
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v26, 0
-; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v26, 1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v30, v28, v30
+; LMULMAX2-RV32-NEXT:    vle64.v v26, (a0)
+; LMULMAX2-RV32-NEXT:    vsrl.vi v28, v26, 1
 ; LMULMAX2-RV32-NEXT:    lui a1, 349525
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v30, v8
-; LMULMAX2-RV32-NEXT:    vsub.vv v28, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v26, 2, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v30, v28, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vsub.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 209715
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v30, v30, v8
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v8
-; LMULMAX2-RV32-NEXT:    vadd.vv v28, v28, v30
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v26, 4, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v30, v28, v30
-; LMULMAX2-RV32-NEXT:    vadd.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v30, v26, v28
+; LMULMAX2-RV32-NEXT:    vsrl.vi v26, v26, 2
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT:    vadd.vv v26, v30, v26
+; LMULMAX2-RV32-NEXT:    vsrl.vi v28, v26, 4
+; LMULMAX2-RV32-NEXT:    vadd.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 61681
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vand.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vand.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    lui a1, 4112
 ; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v30, a1
+; LMULMAX2-RV32-NEXT:    vmv.v.x v28, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmul.vv v28, v28, v30
+; LMULMAX2-RV32-NEXT:    vmul.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    addi a1, zero, 56
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v26, v28, v26
+; LMULMAX2-RV32-NEXT:    vsrl.vx v26, v26, a1
 ; LMULMAX2-RV32-NEXT:    vse64.v v26, (a0)
 ; LMULMAX2-RV32-NEXT:    ret
 ;
@@ -937,72 +889,56 @@ define void @ctpop_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-LABEL: ctpop_v4i64:
 ; LMULMAX1-RV32:       # %bb.0:
 ; LMULMAX1-RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vle64.v v25, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v26, (a1)
-; LMULMAX1-RV32-NEXT:    addi a2, zero, 5
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 1, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.i v27, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v28, v27, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v29, v26, v28
+; LMULMAX1-RV32-NEXT:    vle64.v v25, (a1)
+; LMULMAX1-RV32-NEXT:    vle64.v v26, (a0)
+; LMULMAX1-RV32-NEXT:    vsrl.vi v27, v25, 1
 ; LMULMAX1-RV32-NEXT:    lui a2, 349525
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v30, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v28, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v29, v29, v30
-; LMULMAX1-RV32-NEXT:    vsub.vv v26, v26, v29
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v29, v27, 2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v31, v26, v29
+; LMULMAX1-RV32-NEXT:    vand.vv v27, v27, v28
+; LMULMAX1-RV32-NEXT:    vsub.vv v25, v25, v27
 ; LMULMAX1-RV32-NEXT:    lui a2, 209715
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 819
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v31, v31, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v8
-; LMULMAX1-RV32-NEXT:    vadd.vv v26, v26, v31
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vim v31, v27, 4, v0
+; LMULMAX1-RV32-NEXT:    vmv.v.x v27, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v26, v31
-; LMULMAX1-RV32-NEXT:    vadd.vv v26, v26, v9
+; LMULMAX1-RV32-NEXT:    vand.vv v29, v25, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v25, v25, 2
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT:    vadd.vv v25, v29, v25
+; LMULMAX1-RV32-NEXT:    vsrl.vi v29, v25, 4
+; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v29
 ; LMULMAX1-RV32-NEXT:    lui a2, 61681
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v29, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v9
+; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v29
 ; LMULMAX1-RV32-NEXT:    lui a2, 4112
 ; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
 ; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v30, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmul.vv v26, v26, v10
+; LMULMAX1-RV32-NEXT:    vmul.vv v25, v25, v30
 ; LMULMAX1-RV32-NEXT:    addi a2, zero, 56
-; LMULMAX1-RV32-NEXT:    vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v27, v27, a2, v0
-; LMULMAX1-RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT:    vsrl.vv v26, v26, v27
-; LMULMAX1-RV32-NEXT:    vsrl.vv v28, v25, v28
-; LMULMAX1-RV32-NEXT:    vand.vv v28, v28, v30
-; LMULMAX1-RV32-NEXT:    vsub.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT:    vsrl.vv v28, v25, v29
-; LMULMAX1-RV32-NEXT:    vand.vv v28, v28, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v8
-; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT:    vsrl.vv v28, v25, v31
-; LMULMAX1-RV32-NEXT:    vadd.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT:    vand.vv v25, v25, v9
-; LMULMAX1-RV32-NEXT:    vmul.vv v25, v25, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v25, v25, v27
-; LMULMAX1-RV32-NEXT:    vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v26, (a1)
+; LMULMAX1-RV32-NEXT:    vsrl.vx v25, v25, a2
+; LMULMAX1-RV32-NEXT:    vsrl.vi v31, v26, 1
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v31, v28
+; LMULMAX1-RV32-NEXT:    vsub.vv v26, v26, v28
+; LMULMAX1-RV32-NEXT:    vand.vv v28, v26, v27
+; LMULMAX1-RV32-NEXT:    vsrl.vi v26, v26, 2
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT:    vadd.vv v26, v28, v26
+; LMULMAX1-RV32-NEXT:    vsrl.vi v27, v26, 4
+; LMULMAX1-RV32-NEXT:    vadd.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT:    vand.vv v26, v26, v29
+; LMULMAX1-RV32-NEXT:    vmul.vv v26, v26, v30
+; LMULMAX1-RV32-NEXT:    vsrl.vx v26, v26, a2
+; LMULMAX1-RV32-NEXT:    vse64.v v26, (a0)
+; LMULMAX1-RV32-NEXT:    vse64.v v25, (a1)
 ; LMULMAX1-RV32-NEXT:    ret
 ;
 ; LMULMAX1-RV64-LABEL: ctpop_v4i64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 7abea8116cbe..c90e0d1aacd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -173,3 +173,139 @@ define void @buildvec_dominant1_optsize_v2i32(<2 x i64>* %x) optsize {
   store <2 x i64> <i64 2049638230412172402, i64 -1>, <2 x i64>* %x
   ret void
 }
+
+define void @buildvec_seq_v8i8_v4i16(<8 x i8>* %x) {
+; CHECK-LABEL: buildvec_seq_v8i8_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, zero, 513
+; CHECK-NEXT:    vsetivli a2, 4, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT:    vse8.v v25, (a0)
+; CHECK-NEXT:    ret
+  store <8 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 undef, i8 2>, <8 x i8>* %x
+  ret void
+}
+
+define void @buildvec_seq_v8i8_v2i32(<8 x i8>* %x) {
+; RV32-LABEL: buildvec_seq_v8i8_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 48
+; RV32-NEXT:    addi a1, a1, 513
+; RV32-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
+; RV32-NEXT:    vmv.v.x v25, a1
+; RV32-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV32-NEXT:    vse8.v v25, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_seq_v8i8_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 48
+; RV64-NEXT:    addiw a1, a1, 513
+; RV64-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
+; RV64-NEXT:    vmv.v.x v25, a1
+; RV64-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV64-NEXT:    vse8.v v25, (a0)
+; RV64-NEXT:    ret
+  store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, <8 x i8>* %x
+  ret void
+}
+
+define void @buildvec_seq_v16i8_v2i64(<16 x i8>* %x) {
+; RV32-LABEL: buildvec_seq_v16i8_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI14_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI14_0)
+; RV32-NEXT:    vsetivli a2, 16, e8,m1,ta,mu
+; RV32-NEXT:    vle8.v v25, (a1)
+; RV32-NEXT:    vse8.v v25, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_seq_v16i8_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 32880
+; RV64-NEXT:    addiw a1, a1, 1541
+; RV64-NEXT:    slli a1, a1, 16
+; RV64-NEXT:    addi a1, a1, 1027
+; RV64-NEXT:    slli a1, a1, 16
+; RV64-NEXT:    addi a1, a1, 513
+; RV64-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
+; RV64-NEXT:    vmv.v.x v25, a1
+; RV64-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
+; RV64-NEXT:    vse8.v v25, (a0)
+; RV64-NEXT:    ret
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>* %x
+  ret void
+}
+
+define void @buildvec_seq2_v16i8_v2i64(<16 x i8>* %x) {
+; RV32-LABEL: buildvec_seq2_v16i8_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 528432
+; RV32-NEXT:    addi a1, a1, 513
+; RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
+; RV32-NEXT:    vmv.v.x v25, a1
+; RV32-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
+; RV32-NEXT:    vse8.v v25, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_seq2_v16i8_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 528432
+; RV64-NEXT:    addiw a1, a1, 513
+; RV64-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
+; RV64-NEXT:    vmv.v.x v25, a1
+; RV64-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
+; RV64-NEXT:    vse8.v v25, (a0)
+; RV64-NEXT:    ret
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8>* %x
+  ret void
+}
+
+define void @buildvec_seq_v9i8(<9 x i8>* %x) {
+; RV32-LABEL: buildvec_seq_v9i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a1, zero, 73
+; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV32-NEXT:    vmv.v.i v25, 2
+; RV32-NEXT:    vmerge.vim v25, v25, 1, v0
+; RV32-NEXT:    addi a1, zero, 36
+; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV32-NEXT:    vmerge.vim v25, v25, 3, v0
+; RV32-NEXT:    vse8.v v25, (a0)
+; RV32-NEXT:    addi a1, zero, 3
+; RV32-NEXT:    sb a1, 8(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_seq_v9i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a1, zero, 3
+; RV64-NEXT:    sb a1, 8(a0)
+; RV64-NEXT:    lui a1, 4104
+; RV64-NEXT:    addiw a1, a1, 385
+; RV64-NEXT:    slli a1, a1, 17
+; RV64-NEXT:    addi a1, a1, 259
+; RV64-NEXT:    slli a1, a1, 16
+; RV64-NEXT:    addi a1, a1, 513
+; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    ret
+  store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, <9 x i8>* %x
+  ret void
+}
+
+define void @buildvec_seq_v4i16_v2i32(<4 x i16>* %x) {
+; CHECK-LABEL: buildvec_seq_v4i16_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, zero, -127
+; CHECK-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a1
+; CHECK-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; CHECK-NEXT:    vse16.v v25, (a0)
+; CHECK-NEXT:    ret
+  store <4 x i16> <i16 -127, i16 -1, i16 -127, i16 -1>, <4 x i16>* %x
+  ret void
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 8c82c1238eac..7ad3a431a6a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1184,16 +1184,17 @@ define void @mulhs_v4i32(<4 x i32>* %x) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vle32.v v25, (a0)
-; RV64-NEXT:    addi a1, zero, 5
-; RV64-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    lui a1, 419430
-; RV64-NEXT:    addiw a1, a1, 1639
-; RV64-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
+; RV64-NEXT:    lui a1, 13107
+; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    slli a1, a1, 12
+; RV64-NEXT:    addi a1, a1, 973
+; RV64-NEXT:    slli a1, a1, 12
+; RV64-NEXT:    addi a1, a1, -819
+; RV64-NEXT:    slli a1, a1, 13
+; RV64-NEXT:    addi a1, a1, -1639
+; RV64-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
 ; RV64-NEXT:    vmv.v.x v26, a1
-; RV64-NEXT:    lui a1, 629146
-; RV64-NEXT:    addiw a1, a1, -1639
-; RV64-NEXT:    vmerge.vxm v26, v26, a1, v0
+; RV64-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vmulh.vv v25, v25, v26
 ; RV64-NEXT:    vsra.vi v25, v25, 1
 ; RV64-NEXT:    vsrl.vi v26, v25, 31
@@ -1229,22 +1230,16 @@ define void @mulhs_v2i64(<2 x i64>* %x) {
 ; RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
 ; RV32-NEXT:    vmul.vv v25, v25, v27
 ; RV32-NEXT:    vadd.vv v25, v26, v25
-; RV32-NEXT:    addi a2, zero, 5
-; RV32-NEXT:    vsetivli a3, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a2
-; RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
 ; RV32-NEXT:    addi a2, zero, 63
-; RV32-NEXT:    vmerge.vxm v27, v26, a2, v0
-; RV32-NEXT:    vsetivli a2, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsrl.vv v27, v25, v27
+; RV32-NEXT:    vsrl.vx v26, v25, a2
 ; RV32-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v28, a1
+; RV32-NEXT:    vmv.s.x v27, a1
+; RV32-NEXT:    vmv.v.i v28, 0
 ; RV32-NEXT:    vsetivli a1, 3, e32,m1,tu,mu
-; RV32-NEXT:    vslideup.vi v26, v28, 2
+; RV32-NEXT:    vslideup.vi v28, v27, 2
 ; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsra.vv v25, v25, v26
-; RV32-NEXT:    vadd.vv v25, v25, v27
+; RV32-NEXT:    vsra.vv v25, v25, v28
+; RV32-NEXT:    vadd.vv v25, v25, v26
 ; RV32-NEXT:    vse64.v v25, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -4622,16 +4617,17 @@ define void @mulhs_v8i32(<8 x i32>* %x) {
 ; LMULMAX2-RV64:       # %bb.0:
 ; LMULMAX2-RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; LMULMAX2-RV64-NEXT:    vle32.v v26, (a0)
-; LMULMAX2-RV64-NEXT:    addi a1, zero, 85
-; LMULMAX2-RV64-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 419430
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1639
-; LMULMAX2-RV64-NEXT:    vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV64-NEXT:    lui a1, 13107
+; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64-NEXT:    slli a1, a1, 12
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 973
+; LMULMAX2-RV64-NEXT:    slli a1, a1, 12
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -819
+; LMULMAX2-RV64-NEXT:    slli a1, a1, 13
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -1639
+; LMULMAX2-RV64-NEXT:    vsetivli a2, 4, e64,m2,ta,mu
 ; LMULMAX2-RV64-NEXT:    vmv.v.x v28, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 629146
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -1639
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v28, v28, a1, v0
+; LMULMAX2-RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; LMULMAX2-RV64-NEXT:    vmulh.vv v26, v26, v28
 ; LMULMAX2-RV64-NEXT:    vsra.vi v26, v26, 1
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v28, v26, 31
@@ -4673,12 +4669,12 @@ define void @mulhs_v8i32(<8 x i32>* %x) {
 ; LMULMAX1-RV64-NEXT:    vle32.v v25, (a0)
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV64-NEXT:    vle32.v v26, (a1)
-; LMULMAX1-RV64-NEXT:    addi a2, zero, 5
-; LMULMAX1-RV64-NEXT:    vsetivli a3, 1, e8,m1,ta,mu
-; LMULMAX1-RV64-NEXT:    vmv.s.x v0, a2
+; LMULMAX1-RV64-NEXT:    addi a2, zero, 3
+; LMULMAX1-RV64-NEXT:    slli a2, a2, 33
+; LMULMAX1-RV64-NEXT:    addi a2, a2, -5
+; LMULMAX1-RV64-NEXT:    vsetivli a3, 2, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT:    vmv.v.x v27, a2
 ; LMULMAX1-RV64-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV64-NEXT:    vmv.v.i v27, 5
-; LMULMAX1-RV64-NEXT:    vmerge.vim v27, v27, -5, v0
 ; LMULMAX1-RV64-NEXT:    vdiv.vv v26, v26, v27
 ; LMULMAX1-RV64-NEXT:    vdiv.vv v25, v25, v27
 ; LMULMAX1-RV64-NEXT:    vse32.v v25, (a0)
@@ -4715,23 +4711,17 @@ define void @mulhs_v4i64(<4 x i64>* %x) {
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
 ; LMULMAX2-RV32-NEXT:    vmulh.vv v26, v26, v30
 ; LMULMAX2-RV32-NEXT:    vadd.vv v26, v26, v28
-; LMULMAX2-RV32-NEXT:    addi a1, zero, 85
-; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v28, 0
 ; LMULMAX2-RV32-NEXT:    addi a1, zero, 63
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v30, v28, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsrl.vv v30, v26, v30
+; LMULMAX2-RV32-NEXT:    vsrl.vx v28, v26, a1
 ; LMULMAX2-RV32-NEXT:    addi a1, zero, 68
 ; LMULMAX2-RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
 ; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vmerge.vim v28, v28, 1, v0
+; LMULMAX2-RV32-NEXT:    vmv.v.i v30, 0
+; LMULMAX2-RV32-NEXT:    vmerge.vim v30, v30, 1, v0
 ; LMULMAX2-RV32-NEXT:    vsetivli a1, 4, e64,m2,ta,mu
-; LMULMAX2-RV32-NEXT:    vsra.vv v26, v26, v28
-; LMULMAX2-RV32-NEXT:    vadd.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vsra.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT:    vadd.vv v26, v26, v28
 ; LMULMAX2-RV32-NEXT:    vse64.v v26, (a0)
 ; LMULMAX2-RV32-NEXT:    ret
 ;
@@ -5707,28 +5697,13 @@ define void @add_iv_v4i32(<4 x i32>* %x) {
 }
 
 define void @add_iv_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: add_iv_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vadd.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_iv_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vadd.vi v25, v25, 1
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_iv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vadd.vi v25, v25, 1
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 1, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5961,28 +5936,13 @@ define void @sub_iv_v4i32(<4 x i32>* %x) {
 }
 
 define void @sub_iv_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: sub_iv_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsub.vv v25, v26, v25
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: sub_iv_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vrsub.vi v25, v25, 1
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: sub_iv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vrsub.vi v25, v25, 1
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 1, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6232,28 +6192,13 @@ define void @and_vi_v4i32(<4 x i32>* %x) {
 }
 
 define void @and_vi_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: and_vi_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, -1
-; RV32-NEXT:    vmerge.vim v26, v26, -2, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vand.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: and_vi_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vand.vi v25, v25, -2
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: and_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vand.vi v25, v25, -2
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 -2, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6311,28 +6256,13 @@ define void @and_iv_v4i32(<4 x i32>* %x) {
 }
 
 define void @and_iv_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: and_iv_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vand.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: and_iv_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vand.vi v25, v25, 1
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: and_iv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vand.vi v25, v25, 1
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 1, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6486,28 +6416,13 @@ define void @or_vi_v4i32(<4 x i32>* %x) {
 }
 
 define void @or_vi_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: or_vi_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, -1
-; RV32-NEXT:    vmerge.vim v26, v26, -2, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vor.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: or_vi_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vor.vi v25, v25, -2
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: or_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vor.vi v25, v25, -2
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 -2, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6565,28 +6480,13 @@ define void @or_iv_v4i32(<4 x i32>* %x) {
 }
 
 define void @or_iv_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: or_iv_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vor.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: or_iv_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vor.vi v25, v25, 1
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: or_iv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vor.vi v25, v25, 1
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 1, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6815,28 +6715,13 @@ define void @xor_iv_v4i32(<4 x i32>* %x) {
 }
 
 define void @xor_iv_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: xor_iv_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vxor.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: xor_iv_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vxor.vi v25, v25, 1
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: xor_iv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vxor.vi v25, v25, 1
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 1, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6990,29 +6875,13 @@ define void @lshr_vi_v4i32(<4 x i32>* %x) {
 }
 
 define void @lshr_vi_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: lshr_vi_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    addi a1, zero, 31
-; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsrl.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: lshr_vi_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vsrl.vi v25, v25, 31
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: lshr_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vsrl.vi v25, v25, 31
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 31, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -7118,29 +6987,13 @@ define void @ashr_vi_v4i32(<4 x i32>* %x) {
 }
 
 define void @ashr_vi_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: ashr_vi_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    addi a1, zero, 31
-; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsra.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ashr_vi_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vsra.vi v25, v25, 31
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: ashr_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vsra.vi v25, v25, 31
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 31, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -7246,29 +7099,13 @@ define void @shl_vi_v4i32(<4 x i32>* %x) {
 }
 
 define void @shl_vi_v2i64(<2 x i64>* %x) {
-; RV32-LABEL: shl_vi_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vle64.v v25, (a0)
-; RV32-NEXT:    addi a1, zero, 5
-; RV32-NEXT:    vsetivli a2, 1, e8,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    addi a1, zero, 31
-; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsll.vv v25, v25, v26
-; RV32-NEXT:    vse64.v v25, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: shl_vi_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV64-NEXT:    vle64.v v25, (a0)
-; RV64-NEXT:    vsll.vi v25, v25, 31
-; RV64-NEXT:    vse64.v v25, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: shl_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
+; CHECK-NEXT:    vle64.v v25, (a0)
+; CHECK-NEXT:    vsll.vi v25, v25, 31
+; CHECK-NEXT:    vse64.v v25, (a0)
+; CHECK-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = insertelement <2 x i64> undef, i64 31, i32 0
   %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -7611,11 +7448,7 @@ define void @mulhu_vx_v2i64(<2 x i64>* %x) {
 ; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
 ; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; RV32-NEXT:    vmulhu.vv v25, v25, v26
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
-; RV32-NEXT:    vmerge.vim v26, v26, 1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsrl.vv v25, v25, v26
+; RV32-NEXT:    vsrl.vi v25, v25, 1
 ; RV32-NEXT:    vse64.v v25, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -7737,12 +7570,8 @@ define void @mulhs_vx_v2i64(<2 x i64>* %x) {
 ; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
 ; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
 ; RV32-NEXT:    vmulh.vv v25, v25, v26
-; RV32-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; RV32-NEXT:    vmv.v.i v26, 0
 ; RV32-NEXT:    addi a1, zero, 63
-; RV32-NEXT:    vmerge.vxm v26, v26, a1, v0
-; RV32-NEXT:    vsetivli a1, 2, e64,m1,ta,mu
-; RV32-NEXT:    vsrl.vv v26, v25, v26
+; RV32-NEXT:    vsrl.vx v26, v25, a1
 ; RV32-NEXT:    vadd.vv v25, v25, v26
 ; RV32-NEXT:    vse64.v v25, (a0)
 ; RV32-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 32f4c270b8ba..5cad41fed7fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1035,20 +1035,10 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1
 define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1071,20 +1061,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8
 define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1132,20 +1112,10 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x
 define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1168,20 +1138,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs,
 define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1228,20 +1188,10 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x
 define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1264,20 +1214,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1300,18 +1240,9 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 define <8 x i64> @mgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
 ; RV32-LABEL: mgather_baseidx_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v28, 0
-; RV32-NEXT:    vmerge.vim v28, v28, 3, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v8, v28
+; RV32-NEXT:    vsll.vi v28, v8, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -1973,20 +1904,10 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(double* %base, <8 x i8> %idxs, <
 define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2009,20 +1930,10 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %id
 define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2070,20 +1981,10 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(double* %base, <8 x i16> %idxs,
 define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2106,20 +2007,10 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %
 define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2166,20 +2057,10 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(double* %base, <8 x i32> %idxs,
 define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2202,20 +2083,10 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %
 define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v8
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v8, v8, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v8
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret
@@ -2238,18 +2109,9 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %
 define <8 x double> @mgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
 ; RV32-LABEL: mgather_baseidx_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v28, 0
-; RV32-NEXT:    vmerge.vim v28, v28, 3, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v8, v28
+; RV32-NEXT:    vsll.vi v28, v8, 3
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,tu,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vloxei64.v v12, (a0), v28, v0.t
 ; RV32-NEXT:    vmv4r.v v8, v12
 ; RV32-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 4aee4d65147b..58716cf2b6bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -857,20 +857,10 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %i
 define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -891,20 +881,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -948,20 +928,10 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16>
 define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -982,20 +952,10 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1038,20 +998,10 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32>
 define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1072,20 +1022,10 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1106,18 +1046,9 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v28, 0
-; RV32-NEXT:    vmerge.vim v28, v28, 3, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v12, v28
+; RV32-NEXT:    vsll.vi v28, v12, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1723,20 +1654,10 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x
 define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1757,20 +1678,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base,
 define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1814,20 +1725,10 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x
 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1848,20 +1749,10 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base,
 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1904,20 +1795,10 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x
 define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1938,20 +1819,10 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base,
 define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v12
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 3, v0
-; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v28, v12
+; RV32-NEXT:    vsll.vi v28, v28, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1972,18 +1843,9 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base,
 define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_baseidx_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v25, v0
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli a2, 1, e16,m1,ta,mu
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
-; RV32-NEXT:    vmv.v.i v28, 0
-; RV32-NEXT:    vmerge.vim v28, v28, 3, v0
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
-; RV32-NEXT:    vsll.vv v28, v12, v28
+; RV32-NEXT:    vsll.vi v28, v12, 3
 ; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
-; RV32-NEXT:    vmv1r.v v0, v25
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;


        


More information about the llvm-commits mailing list