[llvm] 808a5f1 - [RISCV] Remove`riscv.segN.load/store` in favor of their mask variants (#137045)

Thu May 8 09:27:30 PDT 2025

Author: Min-Yih Hsu
Date: 2025-05-08T09:27:26-07:00
New Revision: 808a5f15d7855f78a837c20e866cf4d0b1b90ab2

URL: https://github.com/llvm/llvm-project/commit/808a5f15d7855f78a837c20e866cf4d0b1b90ab2
DIFF: https://github.com/llvm/llvm-project/commit/808a5f15d7855f78a837c20e866cf4d0b1b90ab2.diff

LOG: [RISCV] Remove`riscv.segN.load/store` in favor of their mask variants (#137045)

RISCVVectorPeepholePass would replace instructions with all-ones mask
with their unmask variant, so there isn't really a point to keep
separate versions of intrinsics.

Note that `riscv.segN.load/store.mask` does not take pointer type (i.e.
address space) as part of its overloading type signature, because RISC-V
doesn't really use address spaces other than the default one.

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll
    llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
    llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll
    llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 7da11b93f6b74..18b2883eb00e7 100644

--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1704,14 +1704,10 @@ let TargetPrefix = "riscv" in {
   }
 
   // Segment loads/stores for fixed vectors.
+  // Note: we only have the masked variants because RISCVVectorPeephole
+  // would lower any instructions with all-ones mask into unmasked version
+  // anyway.
   foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
-    // Input: (pointer, vl)
-    def int_riscv_seg # nf # _load
-          : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
-                                              !listsplat(LLVMMatchType<0>,
-                                              !add(nf, -1))),
-                                  [llvm_anyptr_ty, llvm_anyint_ty],
-                                  [NoCapture<ArgIndex<0>>, IntrReadMem]>;
     // Input: (pointer, mask, vl)
     def int_riscv_seg # nf # _load_mask
           : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
@@ -1721,15 +1717,7 @@ let TargetPrefix = "riscv" in {
                                    llvm_anyint_ty],
                                   [NoCapture<ArgIndex<0>>, IntrReadMem]>;
 
-    // Input: (<stored values>, pointer, vl)
-    def int_riscv_seg # nf # _store
-          : DefaultAttrsIntrinsic<[],
-                                  !listconcat([llvm_anyvector_ty],
-                                              !listsplat(LLVMMatchType<0>,
-                                                          !add(nf, -1)),
-                                              [llvm_anyptr_ty, llvm_anyint_ty]),
-                                  [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
-    // Input: (<stored values>, pointer, mask, vl)
+    // Input: (<stored values>..., pointer, mask, vl)
     def int_riscv_seg # nf # _store_mask
           : DefaultAttrsIntrinsic<[],
                                   !listconcat([llvm_anyvector_ty],

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f2bc1765bc4c6..a6fb09bd6d5b2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1745,13 +1745,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
                  MachineMemOperand::MOVolatile;
     return true;
-  case Intrinsic::riscv_seg2_load:
-  case Intrinsic::riscv_seg3_load:
-  case Intrinsic::riscv_seg4_load:
-  case Intrinsic::riscv_seg5_load:
-  case Intrinsic::riscv_seg6_load:
-  case Intrinsic::riscv_seg7_load:
-  case Intrinsic::riscv_seg8_load:
   case Intrinsic::riscv_seg2_load_mask:
   case Intrinsic::riscv_seg3_load_mask:
   case Intrinsic::riscv_seg4_load_mask:
@@ -1761,17 +1754,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::riscv_seg8_load_mask:
     return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
-  case Intrinsic::riscv_seg2_store:
-  case Intrinsic::riscv_seg3_store:
-  case Intrinsic::riscv_seg4_store:
-  case Intrinsic::riscv_seg5_store:
-  case Intrinsic::riscv_seg6_store:
-  case Intrinsic::riscv_seg7_store:
-  case Intrinsic::riscv_seg8_store:
-    // Operands are (vec, ..., vec, ptr, vl)
-    return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
-                               /*IsStore*/ true,
-                               /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
   case Intrinsic::riscv_seg2_store_mask:
   case Intrinsic::riscv_seg3_store_mask:
   case Intrinsic::riscv_seg4_store_mask:
@@ -10573,13 +10555,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   switch (IntNo) {
   default:
     break;
-  case Intrinsic::riscv_seg2_load:
-  case Intrinsic::riscv_seg3_load:
-  case Intrinsic::riscv_seg4_load:
-  case Intrinsic::riscv_seg5_load:
-  case Intrinsic::riscv_seg6_load:
-  case Intrinsic::riscv_seg7_load:
-  case Intrinsic::riscv_seg8_load:
   case Intrinsic::riscv_seg2_load_mask:
   case Intrinsic::riscv_seg3_load_mask:
   case Intrinsic::riscv_seg4_load_mask:
@@ -10602,18 +10577,13 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                   ContainerVT.getScalarSizeInBits();
     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
 
-    // Masked: (pointer, mask, vl)
-    // Non-masked: (pointer, vl)
-    bool IsMasked = Op.getNumOperands() > 4;
+    // Operands: (chain, int_id, pointer, mask, vl)
     SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
-    SDValue Mask =
-        IsMasked ? Op.getOperand(3) : getAllOnesMask(ContainerVT, VL, DL, DAG);
+    SDValue Mask = Op.getOperand(3);
     MVT MaskVT = Mask.getSimpleValueType();
-    if (MaskVT.isFixedLengthVector()) {
-      MVT MaskContainerVT =
-          ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
-      Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
-    }
+    MVT MaskContainerVT =
+        ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+    Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
 
     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
     auto *Load = cast<MemIntrinsicSDNode>(Op);
@@ -10681,13 +10651,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   switch (IntNo) {
   default:
     break;
-  case Intrinsic::riscv_seg2_store:
-  case Intrinsic::riscv_seg3_store:
-  case Intrinsic::riscv_seg4_store:
-  case Intrinsic::riscv_seg5_store:
-  case Intrinsic::riscv_seg6_store:
-  case Intrinsic::riscv_seg7_store:
-  case Intrinsic::riscv_seg8_store:
   case Intrinsic::riscv_seg2_store_mask:
   case Intrinsic::riscv_seg3_store_mask:
   case Intrinsic::riscv_seg4_store_mask:
@@ -10702,24 +10665,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
         Intrinsic::riscv_vsseg8_mask};
 
-    bool IsMasked = false;
-    switch (IntNo) {
-    case Intrinsic::riscv_seg2_store_mask:
-    case Intrinsic::riscv_seg3_store_mask:
-    case Intrinsic::riscv_seg4_store_mask:
-    case Intrinsic::riscv_seg5_store_mask:
-    case Intrinsic::riscv_seg6_store_mask:
-    case Intrinsic::riscv_seg7_store_mask:
-    case Intrinsic::riscv_seg8_store_mask:
-      IsMasked = true;
-      break;
-    default:
-      break;
-    }
-
-    // Non-masked: (chain, int_id, vec*, ptr, vl)
-    // Masked: (chain, int_id, vec*, ptr, mask, vl)
-    unsigned NF = Op->getNumOperands() - (IsMasked ? 5 : 4);
+    // Operands: (chain, int_id, vec*, ptr, mask, vl)
+    unsigned NF = Op->getNumOperands() - 5;
     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
     MVT XLenVT = Subtarget.getXLenVT();
     MVT VT = Op->getOperand(2).getSimpleValueType();
@@ -10729,14 +10676,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
 
     SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
-    SDValue Mask = IsMasked ? Op.getOperand(Op.getNumOperands() - 2)
-                            : getAllOnesMask(ContainerVT, VL, DL, DAG);
+    SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
     MVT MaskVT = Mask.getSimpleValueType();
-    if (MaskVT.isFixedLengthVector()) {
-      MVT MaskContainerVT =
-          ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
-      Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
-    }
+    MVT MaskContainerVT =
+        ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+    Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
 
     SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
     SDValue Ptr = Op->getOperand(NF + 2);
@@ -23781,10 +23725,10 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
 }
 
 static const Intrinsic::ID FixedVlsegIntrIds[] = {
-    Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
-    Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
-    Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
-    Intrinsic::riscv_seg8_load};
+    Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask,
+    Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask,
+    Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
+    Intrinsic::riscv_seg8_load_mask};
 
 /// Lower an interleaved load into a vlsegN intrinsic.
 ///
@@ -23835,10 +23779,10 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
   };
 
   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
-
-  CallInst *VlsegN = Builder.CreateIntrinsic(
-      FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
-      {LI->getPointerOperand(), VL});
+  Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
+  CallInst *VlsegN =
+      Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], {VTy, XLenTy},
+                              {LI->getPointerOperand(), Mask, VL});
 
   for (unsigned i = 0; i < Shuffles.size(); i++) {
     Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
@@ -23849,10 +23793,10 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
 }
 
 static const Intrinsic::ID FixedVssegIntrIds[] = {
-    Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
-    Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
-    Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
-    Intrinsic::riscv_seg8_store};
+    Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
+    Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
+    Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
+    Intrinsic::riscv_seg8_store_mask};
 
 /// Lower an interleaved store into a vssegN intrinsic.
 ///
@@ -23912,8 +23856,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
   }
 
   Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
-      SI->getModule(), FixedVssegIntrIds[Factor - 2],
-      {VTy, SI->getPointerOperandType(), XLenTy});
+      SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, XLenTy});
 
   SmallVector<Value *, 10> Ops;
   SmallVector<int, 16> NewShuffleMask;
@@ -23933,7 +23876,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
   // potentially under larger LMULs) because we checked that the fixed vector
   // type fits in isLegalInterleavedAccessType
   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
-  Ops.append({SI->getPointerOperand(), VL});
+  Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount());
+  Ops.append({SI->getPointerOperand(), StoreMask, VL});
 
   Builder.CreateCall(VssegNFunc, Ops);
 
@@ -23962,10 +23906,10 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
     Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
+    Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount());
     Return =
-        Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
-                                {ResVTy, LI->getPointerOperandType(), XLenTy},
-                                {LI->getPointerOperand(), VL});
+        Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], {ResVTy, XLenTy},
+                                {LI->getPointerOperand(), Mask, VL});
   } else {
     static const Intrinsic::ID IntrIds[] = {
         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
@@ -24029,12 +23973,12 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
     Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
-        SI->getModule(), FixedVssegIntrIds[Factor - 2],
-        {InVTy, SI->getPointerOperandType(), XLenTy});
+        SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, XLenTy});
 
     SmallVector<Value *, 10> Ops(InterleaveValues);
     Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
-    Ops.append({SI->getPointerOperand(), VL});
+    Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount());
+    Ops.append({SI->getPointerOperand(), Mask, VL});
 
     Builder.CreateCall(VssegNFunc, Ops);
   } else {
@@ -24156,15 +24100,9 @@ bool RISCVTargetLowering::lowerInterleavedVPLoad(
 
   Value *Return = nullptr;
   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
-    static const Intrinsic::ID FixedMaskedVlsegIntrIds[] = {
-        Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask,
-        Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask,
-        Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
-        Intrinsic::riscv_seg8_load_mask};
-
-    Return = Builder.CreateIntrinsic(FixedMaskedVlsegIntrIds[Factor - 2],
-                                     {FVTy, XLenTy},
-                                     {Load->getArgOperand(0), Mask, EVL});
+    Return =
+        Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], {FVTy, XLenTy},
+                                {Load->getArgOperand(0), Mask, EVL});
   } else {
     static const Intrinsic::ID IntrMaskIds[] = {
         Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
@@ -24276,15 +24214,9 @@ bool RISCVTargetLowering::lowerInterleavedVPStore(
       XLenTy);
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
-    static const Intrinsic::ID FixedMaskedVssegIntrIds[] = {
-        Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
-        Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
-        Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
-        Intrinsic::riscv_seg8_store_mask};
-
     SmallVector<Value *, 8> Operands(InterleaveOperands);
     Operands.append({Store->getArgOperand(1), Mask, EVL});
-    Builder.CreateIntrinsic(FixedMaskedVssegIntrIds[Factor - 2], {FVTy, XLenTy},
+    Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], {FVTy, XLenTy},
                             Operands);
     return true;
   }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
index 5ac8a034dcf38..4eed3df0d3f16 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
@@ -7,7 +7,7 @@ define <8 x i8> @load_factor2(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg2e8.v v7, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
     ret <8 x i8> %3
@@ -19,7 +19,7 @@ define <8 x i8> @load_factor3(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg3e8.v v6, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -32,7 +32,7 @@ define <8 x i8> @load_factor4(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg4e8.v v5, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -46,7 +46,7 @@ define <8 x i8> @load_factor5(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg5e8.v v4, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -61,7 +61,7 @@ define <8 x i8> @load_factor6(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg6e8.v v3, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -77,7 +77,7 @@ define <8 x i8> @load_factor7(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg7e8.v v2, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -94,7 +94,7 @@ define <8 x i8> @load_factor8(ptr %ptr) {
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vlseg8e8.v v1, (a0)
 ; CHECK-NEXT:    ret
-    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0.i64(ptr %ptr, i64 8)
+    %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
     %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
     %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
@@ -105,10 +105,3 @@ define <8 x i8> @load_factor8(ptr %ptr) {
     %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 7
     ret <8 x i8> %9
 }
-declare { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0.i64(ptr, i64)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0.i64(ptr, i64)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll
index 56b01c03d9511..46b9beb6344bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll
@@ -1,80 +1,72 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s |llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-declare void @llvm.riscv.seg2.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor2(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr) {
 ; CHECK-LABEL: store_factor2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg2e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg2.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg2.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg3.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor3(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr) {
 ; CHECK-LABEL: store_factor3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg3e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg3.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg3.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg4.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor4(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr) {
 ; CHECK-LABEL: store_factor4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg4e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg4.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg4.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg5.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor5(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr) {
 ; CHECK-LABEL: store_factor5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg5e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg5.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg5.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg6.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor6(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr) {
 ; CHECK-LABEL: store_factor6:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg6e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg6.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg6.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg7.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8x i8>, ptr, iXLen)
 define void @store_factor7(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr) {
 ; CHECK-LABEL: store_factor7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg7e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg7.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg7.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }
 
-declare void @llvm.riscv.seg8.store.v8i8.p0.iXLen(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr, iXLen)
 define void @store_factor8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr) {
 ; CHECK-LABEL: store_factor8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vsseg8e8.v v8, (a0)
 ; CHECK-NEXT:    ret
-    call void @llvm.riscv.seg8.store.v8i8.p0.iXLen(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, iXLen 8)
+    call void @llvm.riscv.seg8.store.mask.v8i8.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, <8 x i1> splat (i1 true), i64 8)
     ret void
 }

diff  --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
index 88d5461083541..f2e2950992421 100644
--- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
@@ -6,13 +6,13 @@
 
 define void @load_factor2(ptr %ptr) {
 ; RV32-LABEL: @load_factor2(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.v8i32.p0.i32(ptr [[PTR:%.*]], i32 8)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.mask.v8i32.i32(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 1
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 0
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor2(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.v8i32.p0.i64(ptr [[PTR:%.*]], i64 8)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.mask.v8i32.i64(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 1
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 0
 ; RV64-NEXT:    ret void
@@ -23,25 +23,6 @@ define void @load_factor2(ptr %ptr) {
   ret void
 }
 
-define void @load_factor2_as(ptr addrspace(1) %ptr) {
-; RV32-LABEL: @load_factor2_as(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.v8i32.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 8)
-; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 1
-; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 0
-; RV32-NEXT:    ret void
-;
-; RV64-LABEL: @load_factor2_as(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <8 x i32>, <8 x i32> } @llvm.riscv.seg2.load.v8i32.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 8)
-; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 1
-; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i32>, <8 x i32> } [[TMP1]], 0
-; RV64-NEXT:    ret void
-;
-  %interleaved.vec = load <16 x i32>, ptr addrspace(1) %ptr
-  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-  ret void
-}
-
 define void @load_factor2_vscale(ptr %ptr) {
 ; RV32-LABEL: @load_factor2_vscale(
 ; RV32-NEXT:    [[TMP1:%.*]] = call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t.i32(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) poison, ptr [[PTR:%.*]], i32 -1, i32 5)
@@ -70,38 +51,16 @@ define void @load_factor2_vscale(ptr %ptr) {
   ret void
 }
 
-define void @load_factor2_vscale_as(ptr addrspace(1) %ptr) {
-; RV32-LABEL: @load_factor2_vscale_as(
-; RV32-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <vscale x 16 x i32>, ptr addrspace(1) [[PTR:%.*]], align 64
-; RV32-NEXT:    [[V:%.*]] = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> [[INTERLEAVED_VEC]])
-; RV32-NEXT:    [[T0:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[V]], 0
-; RV32-NEXT:    [[T1:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[V]], 1
-; RV32-NEXT:    ret void
-;
-; RV64-LABEL: @load_factor2_vscale_as(
-; RV64-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <vscale x 16 x i32>, ptr addrspace(1) [[PTR:%.*]], align 64
-; RV64-NEXT:    [[V:%.*]] = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> [[INTERLEAVED_VEC]])
-; RV64-NEXT:    [[T0:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[V]], 0
-; RV64-NEXT:    [[T1:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } [[V]], 1
-; RV64-NEXT:    ret void
-;
-  %interleaved.vec = load <vscale x 16 x i32>, ptr addrspace(1) %ptr
-  %v = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %interleaved.vec)
-  %t0 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %v, 0
-  %t1 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %v, 1
-  ret void
-}
-
 define void @load_factor3(ptr %ptr) {
 ; RV32-LABEL: @load_factor3(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 1
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 0
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor3(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 1
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 0
@@ -116,7 +75,7 @@ define void @load_factor3(ptr %ptr) {
 
 define void @load_factor4(ptr %ptr) {
 ; RV32-LABEL: @load_factor4(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 1
@@ -124,7 +83,7 @@ define void @load_factor4(ptr %ptr) {
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor4(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 1
@@ -187,7 +146,7 @@ define void @load_factor4_vscale(ptr %ptr) {
 
 define void @load_factor5(ptr %ptr) {
 ; RV32-LABEL: @load_factor5(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg5.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg5.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
@@ -196,7 +155,7 @@ define void @load_factor5(ptr %ptr) {
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor5(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg5.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg5.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 2
@@ -215,7 +174,7 @@ define void @load_factor5(ptr %ptr) {
 
 define void @load_factor6(ptr %ptr) {
 ; RV32-LABEL: @load_factor6(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg6.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg6.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
@@ -225,7 +184,7 @@ define void @load_factor6(ptr %ptr) {
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor6(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg6.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg6.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 3
@@ -246,7 +205,7 @@ define void @load_factor6(ptr %ptr) {
 
 define void @load_factor7(ptr %ptr) {
 ; RV32-LABEL: @load_factor7(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg7.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg7.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 6
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
@@ -257,7 +216,7 @@ define void @load_factor7(ptr %ptr) {
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor7(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg7.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg7.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 6
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 4
@@ -280,7 +239,7 @@ define void @load_factor7(ptr %ptr) {
 
 define void @load_factor8(ptr %ptr) {
 ; RV32-LABEL: @load_factor8(
-; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg8.load.v4i32.p0.i32(ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg8.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 7
 ; RV32-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 6
 ; RV32-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
@@ -292,7 +251,7 @@ define void @load_factor8(ptr %ptr) {
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @load_factor8(
-; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg8.load.v4i32.p0.i64(ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg8.load.mask.v4i32.i64(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 7
 ; RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 6
 ; RV64-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP1]], 5
@@ -404,13 +363,13 @@ define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) {
 ; RV32-LABEL: @store_factor2(
 ; RV32-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV32-NEXT:    call void @llvm.riscv.seg2.store.v8i8.p0.i32(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr [[PTR:%.*]], i32 8)
+; RV32-NEXT:    call void @llvm.riscv.seg2.store.mask.v8i8.i32(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor2(
 ; RV64-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV64-NEXT:    call void @llvm.riscv.seg2.store.v8i8.p0.i64(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr [[PTR:%.*]], i64 8)
+; RV64-NEXT:    call void @llvm.riscv.seg2.store.mask.v8i8.i64(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8)
 ; RV64-NEXT:    ret void
 ;
   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -418,24 +377,6 @@ define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) {
   ret void
 }
 
-define void @store_factor2_as(ptr addrspace(1) %ptr, <8 x i8> %v0, <8 x i8> %v1) {
-; RV32-LABEL: @store_factor2_as(
-; RV32-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV32-NEXT:    call void @llvm.riscv.seg2.store.v8i8.p1.i32(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr addrspace(1) [[PTR:%.*]], i32 8)
-; RV32-NEXT:    ret void
-;
-; RV64-LABEL: @store_factor2_as(
-; RV64-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV64-NEXT:    call void @llvm.riscv.seg2.store.v8i8.p1.i64(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr addrspace(1) [[PTR:%.*]], i64 8)
-; RV64-NEXT:    ret void
-;
-  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  store <16 x i8> %interleaved.vec, ptr addrspace(1) %ptr, align 4
-  ret void
-}
-
 define void @store_factor2_vscale(ptr %ptr, <vscale x 8 x i8> %v0, <vscale x 8 x i8> %v1) {
 ; RV32-LABEL: @store_factor2_vscale(
 ; RV32-NEXT:    [[TMP1:%.*]] = call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) poison, <vscale x 8 x i8> [[V0:%.*]], i32 0)
@@ -454,22 +395,6 @@ define void @store_factor2_vscale(ptr %ptr, <vscale x 8 x i8> %v0, <vscale x 8 x
   ret void
 }
 
-define void @store_factor2_vscale_as(ptr addrspace(1) %ptr, <vscale x 8 x i8> %v0, <vscale x 8 x i8> %v1) {
-; RV32-LABEL: @store_factor2_vscale_as(
-; RV32-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8> [[V0:%.*]], <vscale x 8 x i8> [[V1:%.*]])
-; RV32-NEXT:    store <vscale x 16 x i8> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 4
-; RV32-NEXT:    ret void
-;
-; RV64-LABEL: @store_factor2_vscale_as(
-; RV64-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8> [[V0:%.*]], <vscale x 8 x i8> [[V1:%.*]])
-; RV64-NEXT:    store <vscale x 16 x i8> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 4
-; RV64-NEXT:    ret void
-;
-  %interleaved.vec = call <vscale x 16 x i8> @llvm.vector.interleave2.nxv8i8(<vscale x 8 x i8> %v0, <vscale x 8 x i8> %v1)
-  store <vscale x 16 x i8> %interleaved.vec, ptr addrspace(1) %ptr, align 4
-  ret void
-}
-
 define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
 ; RV32-LABEL: @store_factor3(
 ; RV32-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -477,7 +402,7 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2
 ; RV32-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RV32-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-; RV32-NEXT:    call void @llvm.riscv.seg3.store.v4i32.p0.i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    call void @llvm.riscv.seg3.store.mask.v4i32.i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor3(
@@ -486,7 +411,7 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2
 ; RV64-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RV64-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-; RV64-NEXT:    call void @llvm.riscv.seg3.store.v4i32.p0.i64(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    call void @llvm.riscv.seg3.store.mask.v4i32.i64(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    ret void
 ;
   %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -504,7 +429,7 @@ define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RV32-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; RV32-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
-; RV32-NEXT:    call void @llvm.riscv.seg4.store.v4i32.p0.i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[PTR:%.*]], i32 4)
+; RV32-NEXT:    call void @llvm.riscv.seg4.store.mask.v4i32.i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor4(
@@ -514,7 +439,7 @@ define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RV64-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; RV64-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
-; RV64-NEXT:    call void @llvm.riscv.seg4.store.v4i32.p0.i64(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[PTR:%.*]], i64 4)
+; RV64-NEXT:    call void @llvm.riscv.seg4.store.mask.v4i32.i64(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; RV64-NEXT:    ret void
 ;
   %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -553,13 +478,13 @@ define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) {
 ; RV32-LABEL: @store_factor2_wide(
 ; RV32-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV32-NEXT:    call void @llvm.riscv.seg2.store.v8i32.p0.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], ptr [[PTR:%.*]], i32 8)
+; RV32-NEXT:    call void @llvm.riscv.seg2.store.mask.v8i32.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor2_wide(
 ; RV64-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; RV64-NEXT:    call void @llvm.riscv.seg2.store.v8i32.p0.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], ptr [[PTR:%.*]], i64 8)
+; RV64-NEXT:    call void @llvm.riscv.seg2.store.mask.v8i32.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8)
 ; RV64-NEXT:    ret void
 ;
   %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -574,7 +499,7 @@ define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32
 ; RV32-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; RV32-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; RV32-NEXT:    call void @llvm.riscv.seg3.store.v8i32.p0.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], ptr [[PTR:%.*]], i32 8)
+; RV32-NEXT:    call void @llvm.riscv.seg3.store.mask.v8i32.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor3_wide(
@@ -583,7 +508,7 @@ define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32
 ; RV64-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; RV64-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; RV64-NEXT:    call void @llvm.riscv.seg3.store.v8i32.p0.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], ptr [[PTR:%.*]], i64 8)
+; RV64-NEXT:    call void @llvm.riscv.seg3.store.mask.v8i32.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8)
 ; RV64-NEXT:    ret void
 ;
   %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -601,7 +526,7 @@ define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32
 ; RV32-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; RV32-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; RV32-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; RV32-NEXT:    call void @llvm.riscv.seg4.store.v8i32.p0.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], ptr [[PTR:%.*]], i32 8)
+; RV32-NEXT:    call void @llvm.riscv.seg4.store.mask.v8i32.i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8)
 ; RV32-NEXT:    ret void
 ;
 ; RV64-LABEL: @store_factor4_wide(
@@ -611,7 +536,7 @@ define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32
 ; RV64-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; RV64-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; RV64-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; RV64-NEXT:    call void @llvm.riscv.seg4.store.v8i32.p0.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], ptr [[PTR:%.*]], i64 8)
+; RV64-NEXT:    call void @llvm.riscv.seg4.store.mask.v8i32.i64(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8)
 ; RV64-NEXT:    ret void
 ;
   %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

diff  --git a/llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll
index ac3cab6638b8c..fcd174a0dfe4b 100644
--- a/llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll
@@ -13,7 +13,7 @@ define <4 x i1> @load_large_vector(ptr %p) {
 ; ZVE32X-NEXT:    ret <4 x i1> [[RET]]
 ;
 ; ZVE64X-LABEL: @load_large_vector(
-; ZVE64X-NEXT:    [[TMP1:%.*]] = call { <4 x ptr>, <4 x ptr>, <4 x ptr> } @llvm.riscv.seg3.load.v4p0.p0.i64(ptr [[P:%.*]], i64 4)
+; ZVE64X-NEXT:    [[TMP1:%.*]] = call { <4 x ptr>, <4 x ptr>, <4 x ptr> } @llvm.riscv.seg3.load.mask.v4p0.i64(ptr [[P:%.*]], <4 x i1> splat (i1 true), i64 4)
 ; ZVE64X-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x ptr>, <4 x ptr>, <4 x ptr> } [[TMP1]], 1
 ; ZVE64X-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x ptr>, <4 x ptr>, <4 x ptr> } [[TMP1]], 0
 ; ZVE64X-NEXT:    [[RET:%.*]] = icmp ne <4 x ptr> [[TMP3]], [[TMP2]]

diff  --git a/llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll
index a94e6a70e79e6..3180b698c7e2b 100644
--- a/llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll
@@ -16,7 +16,7 @@ define {<16 x i32>, <16 x i32>} @load_factor2_large(ptr %ptr) {
 ; ZVL32B-NEXT:    ret { <16 x i32>, <16 x i32> } [[RES1]]
 ;
 ; ZVL128B-LABEL: @load_factor2_large(
-; ZVL128B-NEXT:    [[TMP1:%.*]] = call { <16 x i32>, <16 x i32> } @llvm.riscv.seg2.load.v16i32.p0.i32(ptr [[PTR:%.*]], i32 16)
+; ZVL128B-NEXT:    [[TMP1:%.*]] = call { <16 x i32>, <16 x i32> } @llvm.riscv.seg2.load.mask.v16i32.i32(ptr [[PTR:%.*]], <16 x i1> splat (i1 true), i32 16)
 ; ZVL128B-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i32>, <16 x i32> } [[TMP1]], 1
 ; ZVL128B-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i32>, <16 x i32> } [[TMP1]], 0
 ; ZVL128B-NEXT:    [[RES0:%.*]] = insertvalue { <16 x i32>, <16 x i32> } undef, <16 x i32> [[TMP3]], 0