[llvm] 80b9510 - [RISCV] Correct VL for fixed length masked scatter.

Mon May 10 09:50:35 PDT 2021

Author: Craig Topper
Date: 2021-05-10T09:50:08-07:00
New Revision: 80b9510806cf11c57f2dd87191d3989fc45defa8

URL: https://github.com/llvm/llvm-project/commit/80b9510806cf11c57f2dd87191d3989fc45defa8
DIFF: https://github.com/llvm/llvm-project/commit/80b9510806cf11c57f2dd87191d3989fc45defa8.diff

LOG: [RISCV] Correct VL for fixed length masked scatter.

We were incorrectly calling getVectorNumElements on a scalable
vector type. This shouldn't be allowed. This gives a warning on
EVT, but not MVT.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9f2e5a019b4bb..05ceafed7c14c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4437,21 +4437,23 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
   if (VT.isFixedLengthVector()) {
     // We need to use the larger of the value and index type to determine the
     // scalable type to use so we don't increase LMUL for any operand/result.
+    MVT ContainerVT;
     if (VT.bitsGE(IndexVT)) {
-      VT = getContainerForFixedLengthVector(VT);
+      ContainerVT = getContainerForFixedLengthVector(VT);
       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
-                                 VT.getVectorElementCount());
+                                 ContainerVT.getVectorElementCount());
     } else {
       IndexVT = getContainerForFixedLengthVector(IndexVT);
-      VT = MVT::getVectorVT(VT.getVectorElementType(),
-                            IndexVT.getVectorElementCount());
+      ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                     IndexVT.getVectorElementCount());
     }
 
     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
-    Val = convertToScalableVector(VT, Val, DAG, Subtarget);
+    Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
 
     if (!IsUnmasked) {
-      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
+      MVT MaskVT =
+          MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
     }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 0b83e6e76f3a2..bd43446ae8502 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -9,7 +9,7 @@ declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1>
 define void @mscatter_v1i8(<1 x i8> %val, <1 x i8*> %ptrs, <1 x i1> %m) {
 ; RV32-LABEL: mscatter_v1i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
+; RV32-NEXT:    vsetivli a0, 1, e8,mf4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -33,7 +33,7 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x i8*> %ptrs, <2 x i1> %m) {
 ;
 ; RV64-LABEL: mscatter_v2i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e8,mf8,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %val, <2 x i8*> %ptrs, i32 1, <2 x i1> %m)
@@ -53,7 +53,7 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 2, e8,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
-; RV64-NEXT:    vsetivli a0, 1, e8,mf8,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i16> %val to <2 x i8>
@@ -77,7 +77,7 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
 ; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsetivli a0, 1, e8,mf8,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; RV64-NEXT:    vsoxei64.v v26, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i32> %val to <2 x i8>
@@ -106,7 +106,6 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV64-NEXT:    vnsrl.wi v26, v25, 0
 ; RV64-NEXT:    vsetivli a0, 2, e8,mf8,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v26, 0
-; RV64-NEXT:    vsetivli a0, 1, e8,mf8,ta,mu
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i8>
@@ -119,13 +118,13 @@ declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>
 define void @mscatter_v4i8(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %m)
@@ -135,13 +134,13 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x i8*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e8,mf4,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -167,13 +166,13 @@ declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>
 define void @mscatter_v8i8(<8 x i8> %val, <8 x i8*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8i8:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e8,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e8,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %m)
@@ -185,7 +184,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, i8* %base, <8 x i8> %idxs, <8
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v9
-; RV32-NEXT:    vsetivli a1, 4, e8,mf2,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e8,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -193,7 +192,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, i8* %base, <8 x i8> %idxs, <8
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v9
-; RV64-NEXT:    vsetivli a1, 4, e8,mf2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e8,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, i8* %base, <8 x i8> %idxs
@@ -206,7 +205,7 @@ declare void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16>, <1 x i16*>, i32, <1 x
 define void @mscatter_v1i16(<1 x i16> %val, <1 x i16*> %ptrs, <1 x i1> %m) {
 ; RV32-LABEL: mscatter_v1i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -230,7 +229,7 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x i16*> %ptrs, <2 x i1> %m) {
 ;
 ; RV64-LABEL: mscatter_v2i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %val, <2 x i16*> %ptrs, i32 2, <2 x i1> %m)
@@ -249,7 +248,7 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs, <
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
-; RV64-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i32> %val to <2 x i16>
@@ -274,7 +273,6 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, <
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v26, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i16>
@@ -287,13 +285,13 @@ declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x
 define void @mscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %val, <4 x i16*> %ptrs, i32 2, <4 x i1> %m)
@@ -303,13 +301,13 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x i16*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -335,13 +333,13 @@ declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x
 define void @mscatter_v8i16(<8 x i16> %val, <8 x i16*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8i16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m)
@@ -354,7 +352,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -363,7 +361,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, i16* %base, <8 x i8> %idxs
@@ -377,7 +375,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -386,7 +384,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i16>
@@ -401,7 +399,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -410,7 +408,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i16>
@@ -425,7 +423,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, i16* %base, <8 x i16> %idxs,
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -434,7 +432,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, i16* %base, <8 x i16> %idxs,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %idxs
@@ -447,7 +445,7 @@ declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x
 define void @mscatter_v1i32(<1 x i32> %val, <1 x i32*> %ptrs, <1 x i1> %m) {
 ; RV32-LABEL: mscatter_v1i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -471,7 +469,7 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x i32*> %ptrs, <2 x i1> %m) {
 ;
 ; RV64-LABEL: mscatter_v2i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %val, <2 x i32*> %ptrs, i32 4, <2 x i1> %m)
@@ -491,7 +489,6 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
-; RV64-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i32>
@@ -504,13 +501,13 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x
 define void @mscatter_v4i32(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 4, <4 x i1> %m)
@@ -520,13 +517,13 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x i32*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -552,13 +549,13 @@ declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x
 define void @mscatter_v8i32(<8 x i32> %val, <8 x i32*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e32,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m)
@@ -571,7 +568,6 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -580,7 +576,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, i32* %base, <8 x i8> %idxs
@@ -594,7 +590,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -603,7 +598,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i32>
@@ -618,7 +613,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -627,7 +621,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i32>
@@ -642,7 +636,6 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16>
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -651,7 +644,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16>
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, i32* %base, <8 x i16> %idxs
@@ -665,7 +658,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -674,7 +666,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i32>
@@ -689,7 +681,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -698,7 +689,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i32>
@@ -712,7 +703,6 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, i32* %base, <8 x i32> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsll.vi v26, v10, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -721,7 +711,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, i32* %base, <8 x i32> %idxs,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %idxs
@@ -752,13 +742,13 @@ declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x
 define void @mscatter_v2i64(<2 x i64> %val, <2 x i64*> %ptrs, <2 x i1> %m) {
 ; RV32-LABEL: mscatter_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %val, <2 x i64*> %ptrs, i32 8, <2 x i1> %m)
@@ -770,13 +760,13 @@ declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x
 define void @mscatter_v4i64(<4 x i64> %val, <4 x i64*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> %val, <4 x i64*> %ptrs, i32 8, <4 x i1> %m)
@@ -786,13 +776,13 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x i64*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x i64*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -818,13 +808,13 @@ declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x
 define void @mscatter_v8i64(<8 x i64> %val, <8 x i64*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e64,m4,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m)
@@ -837,7 +827,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v12
 ; RV32-NEXT:    vsll.vi v26, v26, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -846,7 +836,6 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, i64* %base, <8 x i8> %idxs
@@ -860,7 +849,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -869,7 +857,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i64>
@@ -884,7 +871,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -893,7 +879,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i64>
@@ -908,7 +893,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16>
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v12
 ; RV32-NEXT:    vsll.vi v26, v26, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -917,7 +902,6 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16>
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, i64* %base, <8 x i16> %idxs
@@ -931,7 +915,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -940,7 +923,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i64>
@@ -955,7 +937,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -964,7 +945,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i64>
@@ -978,7 +958,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32>
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsll.vi v26, v12, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -987,7 +967,6 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32>
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, i64* %base, <8 x i32> %idxs
@@ -1001,7 +980,6 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1010,7 +988,6 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i32> %idxs to <8 x i64>
@@ -1025,7 +1002,6 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1034,7 +1010,6 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i32> %idxs to <8 x i64>
@@ -1048,7 +1023,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsll.vi v28, v12, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1056,7 +1030,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs,
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsll.vi v28, v12, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %idxs
@@ -1069,7 +1042,7 @@ declare void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half>, <1 x half*>, i32, <1
 define void @mscatter_v1f16(<1 x half> %val, <1 x half*> %ptrs, <1 x i1> %m) {
 ; RV32-LABEL: mscatter_v1f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 1, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1093,7 +1066,7 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x half*> %ptrs, <2 x i1> %m) {
 ;
 ; RV64-LABEL: mscatter_v2f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e16,mf4,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> %val, <2 x half*> %ptrs, i32 2, <2 x i1> %m)
@@ -1105,13 +1078,13 @@ declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4
 define void @mscatter_v4f16(<4 x half> %val, <4 x half*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> %val, <4 x half*> %ptrs, i32 2, <4 x i1> %m)
@@ -1121,13 +1094,13 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x half*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x half*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e16,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -1153,13 +1126,13 @@ declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8
 define void @mscatter_v8f16(<8 x half> %val, <8 x half*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8f16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m)
@@ -1172,7 +1145,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8>
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1181,7 +1154,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8>
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, half* %base, <8 x i8> %idxs
@@ -1195,7 +1168,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1204,7 +1177,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i16>
@@ -1219,7 +1192,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf4 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1228,7 +1201,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i16>
@@ -1243,7 +1216,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, half* %base, <8 x i16> %idx
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v9
 ; RV32-NEXT:    vsll.vi v26, v26, 1
-; RV32-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1252,7 +1225,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, half* %base, <8 x i16> %idx
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v9
 ; RV64-NEXT:    vsll.vi v28, v28, 1
-; RV64-NEXT:    vsetivli a1, 4, e16,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e16,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds half, half* %base, <8 x i16> %idxs
@@ -1265,7 +1238,7 @@ declare void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float>, <1 x float*>, i32,
 define void @mscatter_v1f32(<1 x float> %val, <1 x float*> %ptrs, <1 x i1> %m) {
 ; RV32-LABEL: mscatter_v1f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 1, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1289,7 +1262,7 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x float*> %ptrs, <2 x i1> %m) {
 ;
 ; RV64-LABEL: mscatter_v2f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e32,mf2,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %val, <2 x float*> %ptrs, i32 4, <2 x i1> %m)
@@ -1301,13 +1274,13 @@ declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32,
 define void @mscatter_v4f32(<4 x float> %val, <4 x float*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %val, <4 x float*> %ptrs, i32 4, <4 x i1> %m)
@@ -1317,13 +1290,13 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x float*> %ptrs, <4 x i1> %m) {
 define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x float*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e32,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -1349,13 +1322,13 @@ declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32,
 define void @mscatter_v8f32(<8 x float> %val, <8 x float*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e32,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m)
@@ -1368,7 +1341,6 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1377,7 +1349,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, float* %base, <8 x i8> %idxs
@@ -1391,7 +1363,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, float* %base, <8
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1400,7 +1371,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, float* %base, <8
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i32>
@@ -1415,7 +1386,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, float* %base, <8
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf4 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1424,7 +1394,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, float* %base, <8
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i32>
@@ -1439,7 +1409,6 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1448,7 +1417,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, float* %base, <8 x i16> %idxs
@@ -1462,7 +1431,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, float* %base, <
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1471,7 +1439,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, float* %base, <
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i32>
@@ -1486,7 +1454,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, float* %base, <
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vzext.vf2 v26, v10
 ; RV32-NEXT:    vsll.vi v26, v26, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1495,7 +1462,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, float* %base, <
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf4 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i32>
@@ -1509,7 +1476,6 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, float* %base, <8 x i32> %i
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsll.vi v26, v10, 2
-; RV32-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1518,7 +1484,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, float* %base, <8 x i32> %i
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v10
 ; RV64-NEXT:    vsll.vi v28, v28, 2
-; RV64-NEXT:    vsetivli a1, 4, e32,m2,ta,mu
+; RV64-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %idxs
@@ -1549,13 +1515,13 @@ declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32
 define void @mscatter_v2f64(<2 x double> %val, <2 x double*> %ptrs, <2 x i1> %m) {
 ; RV32-LABEL: mscatter_v2f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; RV32-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 1, e64,m1,ta,mu
+; RV64-NEXT:    vsetivli a0, 2, e64,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> %m)
@@ -1567,13 +1533,13 @@ declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32
 define void @mscatter_v4f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %m) {
 ; RV32-LABEL: mscatter_v4f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v4f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, i32 8, <4 x i1> %m)
@@ -1583,13 +1549,13 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %m)
 define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x double*> %ptrs) {
 ; RV32-LABEL: mscatter_truemask_v4f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_truemask_v4f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 2, e64,m2,ta,mu
+; RV64-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
@@ -1615,13 +1581,13 @@ declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32
 define void @mscatter_v8f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m) {
 ; RV32-LABEL: mscatter_v8f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli a0, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v8f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli a0, 4, e64,m4,ta,mu
+; RV64-NEXT:    vsetivli a0, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
 ; RV64-NEXT:    ret
   call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m)
@@ -1634,7 +1600,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf4 v26, v12
 ; RV32-NEXT:    vsll.vi v26, v26, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1643,7 +1609,6 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, double* %base, <8 x i8> %idxs
@@ -1657,7 +1622,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf8 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1666,7 +1630,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i8> %idxs to <8 x i64>
@@ -1681,7 +1644,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf8 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1690,7 +1652,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf8 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i8> %idxs to <8 x i64>
@@ -1705,7 +1666,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsext.vf2 v26, v12
 ; RV32-NEXT:    vsll.vi v26, v26, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1714,7 +1675,6 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, double* %base, <8 x i16> %idxs
@@ -1728,7 +1688,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1737,7 +1696,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i16> %idxs to <8 x i64>
@@ -1752,7 +1710,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf4 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1761,7 +1718,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf4 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i16> %idxs to <8 x i64>
@@ -1775,7 +1731,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e32,m2,ta,mu
 ; RV32-NEXT:    vsll.vi v26, v12, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
+; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v26, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1784,7 +1740,6 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, double* %base, <8 x i32> %idxs
@@ -1798,7 +1753,6 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsext.vf2 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1807,7 +1761,6 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = sext <8 x i32> %idxs to <8 x i64>
@@ -1822,7 +1775,6 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vzext.vf2 v28, v12
 ; RV32-NEXT:    vsll.vi v28, v28, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1831,7 +1783,6 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vzext.vf2 v28, v12
 ; RV64-NEXT:    vsll.vi v28, v28, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %eidxs = zext <8 x i32> %idxs to <8 x i64>
@@ -1845,7 +1796,6 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV32-NEXT:    vsll.vi v28, v12, 3
-; RV32-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV32-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1853,7 +1803,6 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 8, e64,m4,ta,mu
 ; RV64-NEXT:    vsll.vi v28, v12, 3
-; RV64-NEXT:    vsetivli a1, 4, e64,m4,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v28, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %idxs
@@ -1868,7 +1817,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a1, 16, e32,m4,ta,mu
 ; RV32-NEXT:    vsext.vf4 v28, v9
-; RV32-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV32-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1876,7 +1825,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vsext.vf8 v16, v9
-; RV64-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs
@@ -1890,9 +1839,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
 ; RV32-LABEL: mscatter_baseidx_v32i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    addi a1, zero, 32
-; RV32-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
+; RV32-NEXT:    vsetvli a2, a1, e32,m8,ta,mu
 ; RV32-NEXT:    vsext.vf4 v16, v10
-; RV32-NEXT:    vsetivli a1, 16, e8,m2,ta,mu
+; RV32-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -1900,7 +1849,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a1, 16, e64,m8,ta,mu
 ; RV64-NEXT:    vsext.vf8 v16, v10
-; RV64-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    vsetivli a1, 16, e8,m2,ta,mu
 ; RV64-NEXT:    vslidedown.vi v26, v8, 16
@@ -1909,7 +1858,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
 ; RV64-NEXT:    vsext.vf8 v8, v28
 ; RV64-NEXT:    vsetivli a1, 2, e8,m1,ta,mu
 ; RV64-NEXT:    vslidedown.vi v0, v0, 2
-; RV64-NEXT:    vsetivli a1, 8, e8,m1,ta,mu
+; RV64-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
 ; RV64-NEXT:    vsoxei64.v v26, (a0), v8, v0.t
 ; RV64-NEXT:    ret
   %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs