[clang] 17a68c6 - [SveEmitter] Implement builtins for contiguous loads/stores

Sander de Smalen via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 14 07:26:29 PDT 2020


Author: Sander de Smalen
Date: 2020-04-14T15:24:57+01:00
New Revision: 17a68c61a952032a8b4363b542606dc0b6b5dbd5

URL: https://github.com/llvm/llvm-project/commit/17a68c61a952032a8b4363b542606dc0b6b5dbd5
DIFF: https://github.com/llvm/llvm-project/commit/17a68c61a952032a8b4363b542606dc0b6b5dbd5.diff

LOG: [SveEmitter] Implement builtins for contiguous loads/stores

This adds builtins for all contiguous loads/stores, including
non-temporal, first-faulting and non-faulting.

Reviewers: efriedma, SjoerdMeijer

Reviewed By: SjoerdMeijer

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76238

Added: 
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c

Modified: 
    clang/include/clang/Basic/TargetBuiltins.h
    clang/include/clang/Basic/arm_sve.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
    clang/utils/TableGen/SveEmitter.cpp

Removed: 
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c


################################################################################
diff  --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index fd339ef34e1e..99af2412426f 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -193,6 +193,12 @@ namespace clang {
     }
 
     bool isLoad() const { return Flags & IsLoad; }
+    bool isStore() const { return Flags & IsStore; }
+    bool isGatherLoad() const { return Flags & IsGatherLoad; }
+    bool isScatterStore() const { return Flags & IsScatterStore; }
+    bool isStructLoad() const { return Flags & IsStructLoad; }
+    bool isStructStore() const { return Flags & IsStructStore; }
+    bool isZExtReturn() const { return Flags & IsZExtReturn; }
 
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 012f639527c7..4e75d0339b5c 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -62,6 +62,28 @@
 // c: const pointer type
 // P: predicate type
 
+// l: int64_t
+
+// A: pointer to int8_t
+// B: pointer to int16_t
+// C: pointer to int32_t
+// D: pointer to int64_t
+
+// E: pointer to uint8_t
+// F: pointer to uint16_t
+// G: pointer to uint32_t
+// H: pointer to uint64_t
+
+// S: const pointer to int8_t
+// T: const pointer to int16_t
+// U: const pointer to int32_t
+// V: const pointer to int64_t
+//
+// W: const pointer to uint8_t
+// X: const pointer to uint16_t
+// Y: const pointer to uint32_t
+// Z: const pointer to uint64_t
+
 class MergeType<int val> {
   int Value = val;
 }
@@ -113,6 +135,12 @@ def FirstMemEltType           : FlagType<0x00000010>;
 //      :                                     :
 def MemEltTypeMask            : FlagType<0x00000070>;
 def IsLoad                    : FlagType<0x00000080>;
+def IsStore                   : FlagType<0x00000100>;
+def IsGatherLoad              : FlagType<0x00000200>;
+def IsScatterStore            : FlagType<0x00000400>;
+def IsStructLoad              : FlagType<0x00000800>;
+def IsStructStore             : FlagType<0x00001000>;
+def IsZExtReturn              : FlagType<0x00002000>; // Return value is sign-extend by default
 
 // Every intrinsic subclasses Inst.
 class Inst<string n, string p, string t, MergeType mt, string i,
@@ -137,3 +165,88 @@ class MInst<string n, string p, string t, list<FlagType> f,
 
 // Load one vector (scalar base)
 def SVLD1   : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad]>;
+def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl",       [IsLoad],               MemEltTyInt8>;
+def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8>;
+def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl",          [IsLoad],               MemEltTyInt16>;
+def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16>;
+def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl",             [IsLoad],               MemEltTyInt32>;
+def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32>;
+
+// Load one vector (scalar base, VL displacement)
+def SVLD1_VNUM   : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad]>;
+def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl",       [IsLoad],               MemEltTyInt8>;
+def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8>;
+def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl",          [IsLoad],               MemEltTyInt16>;
+def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16>;
+def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32>;
+def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32>;
+
+// First-faulting load one vector (scalar base)
+def SVLDFF1   : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldff1">;
+def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldff1">;
+def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1">;
+def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl",          [IsLoad],               MemEltTyInt16,   "aarch64_sve_ldff1">;
+def SVLDFF1UH : MInst<"svldff1uh_{d}", "dPX", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1">;
+def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldff1">;
+def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1">;
+
+// First-faulting load one vector (scalar base, VL displacement)
+def SVLDFF1_VNUM   : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldff1">;
+def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldff1">;
+def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1">;
+def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl",          [IsLoad],               MemEltTyInt16,   "aarch64_sve_ldff1">;
+def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1">;
+def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldff1">;
+def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1">;
+
+// Non-faulting load one vector (scalar base)
+def SVLDNF1   : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldnf1">;
+def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldnf1">;
+def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnf1">;
+def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl",          [IsLoad],               MemEltTyInt16,   "aarch64_sve_ldnf1">;
+def SVLDNF1UH : MInst<"svldnf1uh_{d}", "dPX", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnf1">;
+def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldnf1">;
+def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnf1">;
+
+// Non-faulting load one vector (scalar base, VL displacement)
+def SVLDNF1_VNUM   : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldnf1">;
+def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldnf1">;
+def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl",       [IsLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnf1">;
+def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl",          [IsLoad],               MemEltTyInt16,   "aarch64_sve_ldnf1">;
+def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnf1">;
+def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldnf1">;
+def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnf1">;
+
+// Load one vector, unextended load, non-temporal (scalar base)
+def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">;
+
+// Load one vector, unextended load, non-temporal (scalar base, VL displacement)
+def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">;
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Stores
+
+// Store one vector (scalar base)
+def SVST1    : MInst<"svst1[_{d}]",  "vPpd", "csilUcUsUiUlhfd", [IsStore]>;
+def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil",             [IsStore], MemEltTyInt8>;
+def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl",          [IsStore], MemEltTyInt8>;
+def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il",              [IsStore], MemEltTyInt16>;
+def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl",            [IsStore], MemEltTyInt16>;
+def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l",               [IsStore], MemEltTyInt32>;
+def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul",              [IsStore], MemEltTyInt32>;
+
+// Store one vector (scalar base, VL displacement)
+def SVST1_VNUM    : MInst<"svst1_vnum[_{d}]",  "vPpld", "csilUcUsUiUlhfd", [IsStore]>;
+def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil",             [IsStore], MemEltTyInt8>;
+def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl",          [IsStore], MemEltTyInt8>;
+def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il",              [IsStore], MemEltTyInt16>;
+def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl",            [IsStore], MemEltTyInt16>;
+def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l",               [IsStore], MemEltTyInt32>;
+def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul",              [IsStore], MemEltTyInt32>;
+
+// Store one vector, with no truncation, non-temporal (scalar base)
+def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">;
+
+// Store one vector, with no truncation, non-temporal (scalar base, VL displacement)
+def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">;

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 812fdc61c25a..a7a29c13697e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7496,23 +7496,68 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
   return C;
 }
 
-Value *CodeGenFunction::EmitSVEMaskedLoad(llvm::Type *ReturnTy,
-                                          SmallVectorImpl<Value *> &Ops) {
-  llvm::PointerType *PTy = cast<llvm::PointerType>(Ops[1]->getType());
-  llvm::Type *MemEltTy = PTy->getPointerElementType();
+Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
+                                          llvm::Type *ReturnTy,
+                                          SmallVectorImpl<Value *> &Ops,
+                                          unsigned BuiltinID,
+                                          bool IsZExtReturn) {
+  QualType LangPTy = E->getArg(1)->getType();
+  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
+      LangPTy->getAs<PointerType>()->getPointeeType());
 
   // The vector type that is returned may be 
diff erent from the
   // eventual type loaded from memory.
   auto VectorTy = cast<llvm::VectorType>(ReturnTy);
   auto MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount());
 
-  Value *Offset = Builder.getInt32(0);
   Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
   Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
+  Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
   BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
 
   Value *Splat0 = Constant::getNullValue(MemoryTy);
-  return Builder.CreateMaskedLoad(BasePtr, Align(1), Predicate, Splat0);
+
+  Value *Load = nullptr;
+  if (!BuiltinID)
+    // Regular masked loads take a 
diff erent path from the SVE-specific ones.
+    Load = Builder.CreateMaskedLoad(BasePtr, llvm::Align(1), Predicate, Splat0);
+  else {
+    BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
+    Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+    Load = Builder.CreateCall(F, {Predicate, BasePtr});
+  }
+
+  return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
+                     : Builder.CreateSExt(Load, VectorTy);
+}
+
+Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
+                                           SmallVectorImpl<Value *> &Ops,
+                                           unsigned BuiltinID) {
+  QualType LangPTy = E->getArg(1)->getType();
+  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
+      LangPTy->getAs<PointerType>()->getPointeeType());
+
+  // The vector type that is stored may be 
diff erent from the
+  // eventual type stored to memory.
+  auto VectorTy = cast<llvm::VectorType>(Ops.back()->getType());
+  auto MemoryTy =
+      llvm::VectorType::get(MemEltTy, VectorTy->getVectorElementCount());
+
+  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+  Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
+  Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0);
+  BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset);
+
+  // Last value is always the data
+  llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
+
+  if (!BuiltinID)
+    return Builder.CreateMaskedStore(Val, BasePtr, llvm::Align(1), Predicate);
+
+  BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
+  Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+  return Builder.CreateCall(F, {Val, Predicate, BasePtr});
 }
 
 Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
@@ -7536,7 +7581,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   SVETypeFlags TypeFlags(Builtin->TypeModifier);
   llvm::Type *Ty = ConvertType(E->getType());
   if (TypeFlags.isLoad())
-    return EmitSVEMaskedLoad(Ty, Ops);
+    return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
+                             TypeFlags.isZExtReturn());
+  else if (TypeFlags.isStore())
+    return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
 
   /// Should not happen
   return nullptr;

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0128a07030c5..32ca12700536 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3904,8 +3904,12 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
 
   llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::VectorType *VTy);
-  llvm::Value *EmitSVEMaskedLoad(llvm::Type *ReturnTy,
-                                 SmallVectorImpl<llvm::Value *> &Ops);
+  llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy,
+                                 SmallVectorImpl<llvm::Value *> &Ops,
+                                 unsigned BuiltinID, bool IsZExtReturn);
+  llvm::Value *EmitSVEMaskedStore(const CallExpr *,
+                                  SmallVectorImpl<llvm::Value *> &Ops,
+                                  unsigned BuiltinID);
   llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
index 82f66432fe33..5ff56627048b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
@@ -1,83 +1,237 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s -D__ARM_FEATURE_SVE | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
-//
-// ld1
-//
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
 
 svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
 {
   // CHECK-LABEL: test_svld1_s8
-  // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer)
-  return svld1_s8(pg, base);
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK-DAG: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_s8,,)(pg, base);
 }
 
 svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
 {
   // CHECK-LABEL: test_svld1_s16
-  // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer)
-  return svld1_s16(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i16> zeroinitializer)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_s16,,)(pg, base);
 }
 
 svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
 {
   // CHECK-LABEL: test_svld1_s32
-  // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer)
-  return svld1_s32(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> zeroinitializer)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_s32,,)(pg, base);
 }
 
 svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
 {
   // CHECK-LABEL: test_svld1_s64
-  // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer)
-  return svld1_s64(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> zeroinitializer)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_s64,,)(pg, base);
 }
 
 svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
 {
   // CHECK-LABEL: test_svld1_u8
-  // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer)
-  return svld1_u8(pg, base);
+  // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_u8,,)(pg, base);
 }
 
 svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
 {
   // CHECK-LABEL: test_svld1_u16
-  // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer)
-  return svld1_u16(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i16> zeroinitializer)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_u16,,)(pg, base);
 }
 
 svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
 {
   // CHECK-LABEL: test_svld1_u32
-  // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer)
-  return svld1_u32(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> zeroinitializer)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_u32,,)(pg, base);
 }
 
 svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
 {
   // CHECK-LABEL: test_svld1_u64
-  // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer)
-  return svld1_u64(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> zeroinitializer)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_u64,,)(pg, base);
 }
 
 svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
 {
   // CHECK-LABEL: test_svld1_f16
-  // CHECK: <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer)
-  return svld1_f16(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x half> zeroinitializer)
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_f16,,)(pg, base);
 }
 
 svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
 {
   // CHECK-LABEL: test_svld1_f32
-  // CHECK: <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer)
-  return svld1_f32(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x float> zeroinitializer)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_f32,,)(pg, base);
 }
 
 svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
 {
   // CHECK-LABEL: test_svld1_f64
-  // CHECK: <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer)
-  return svld1_f64(pg, base);
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x double> zeroinitializer)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base);
+}
+svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_s8
+  // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_s8,,)(pg, base, vnum);
+}
+
+svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i16> zeroinitializer)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_s16,,)(pg, base, vnum);
+}
+
+svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> zeroinitializer)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_s32,,)(pg, base, vnum);
+}
+
+svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> zeroinitializer)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_s64,,)(pg, base, vnum);
+}
+
+svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_u8
+  // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_u8,,)(pg, base, vnum);
+}
+
+svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i16> zeroinitializer)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_u16,,)(pg, base, vnum);
+}
+
+svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> zeroinitializer)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_u32,,)(pg, base, vnum);
+}
+
+svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> zeroinitializer)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_u64,,)(pg, base, vnum);
+}
+
+svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x half> zeroinitializer)
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_f16,,)(pg, base, vnum);
+}
+
+svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x float> zeroinitializer)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_f32,,)(pg, base, vnum);
+}
+
+svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1_vnum_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x double> zeroinitializer)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum);
 }

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c
deleted file mode 100644
index 90258f00de43..000000000000
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1_shortform.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s -D__ARM_FEATURE_SVE | FileCheck %s
-
-#include <arm_sve.h>
-//
-// ld1
-//
-
-svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
-{
-  // CHECK-LABEL: test_svld1_s8
-  // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
-{
-  // CHECK-LABEL: test_svld1_s16
-  // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
-{
-  // CHECK-LABEL: test_svld1_s32
-  // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
-{
-  // CHECK-LABEL: test_svld1_s64
-  // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
-{
-  // CHECK-LABEL: test_svld1_u8
-  // CHECK: <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* %{{.*}}, i32 1, <vscale x 16 x i1> %{{.*}}, <vscale x 16 x i8> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
-{
-  // CHECK-LABEL: test_svld1_u16
-  // CHECK: <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x i16> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
-{
-  // CHECK-LABEL: test_svld1_u32
-  // CHECK: <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
-{
-  // CHECK-LABEL: test_svld1_u64
-  // CHECK: <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x i64> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
-{
-  // CHECK-LABEL: test_svld1_f16
-  // CHECK: <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* %{{.*}}, i32 1, <vscale x 8 x i1> %{{.*}}, <vscale x 8 x half> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
-{
-  // CHECK-LABEL: test_svld1_f32
-  // CHECK: <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %{{.*}}, i32 1, <vscale x 4 x i1> %{{.*}}, <vscale x 4 x float> zeroinitializer)
-  return svld1(pg, base);
-}
-
-svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
-{
-  // CHECK-LABEL: test_svld1_f64
-  // CHECK: <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* %{{.*}}, i32 1, <vscale x 2 x i1> %{{.*}}, <vscale x 2 x double> zeroinitializer)
-  return svld1(pg, base);
-}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
new file mode 100644
index 000000000000..e9c3a8c9dd6c
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svld1sb_s16(pg, base);
+}
+
+svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sb_s32(pg, base);
+}
+
+svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sb_s64(pg, base);
+}
+
+svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svld1sb_u16(pg, base);
+}
+
+svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sb_u32(pg, base);
+}
+
+svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svld1sb_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sb_u64(pg, base);
+}
+
+svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svld1sb_vnum_s16(pg, base, vnum);
+}
+
+svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sb_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sb_vnum_s64(pg, base, vnum);
+}
+
+svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svld1sb_vnum_u16(pg, base, vnum);
+}
+
+svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sb_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sb_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sb_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
new file mode 100644
index 000000000000..a62f86cd91ff
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svld1sh_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sh_s32(pg, base);
+}
+
+svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svld1sh_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sh_s64(pg, base);
+}
+
+svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svld1sh_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sh_u32(pg, base);
+}
+
+svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svld1sh_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sh_u64(pg, base);
+}
+
+svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svld1sh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
new file mode 100644
index 000000000000..79af6fd339aa
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svld1sw_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sw_s64(pg, base);
+}
+
+svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svld1sw_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sw_u64(pg, base);
+}
+
+svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1sw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svld1sw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
new file mode 100644
index 000000000000..fa430cc7bf50
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svld1ub_s16(pg, base);
+}
+
+svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1ub_s32(pg, base);
+}
+
+svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1ub_s64(pg, base);
+}
+
+svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svld1ub_u16(pg, base);
+}
+
+svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1ub_u32(pg, base);
+}
+
+svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svld1ub_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1ub_u64(pg, base);
+}
+
+svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svld1ub_vnum_s16(pg, base, vnum);
+}
+
+svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1ub_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1ub_vnum_s64(pg, base, vnum);
+}
+
+svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]], <vscale x 8 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svld1ub_vnum_u16(pg, base, vnum);
+}
+
+svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1ub_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1ub_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i8> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1ub_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
new file mode 100644
index 000000000000..32de2decd8db
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svld1uh_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1uh_s32(pg, base);
+}
+
+svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svld1uh_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uh_s64(pg, base);
+}
+
+svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svld1uh_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1uh_u32(pg, base);
+}
+
+svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svld1uh_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uh_u64(pg, base);
+}
+
+svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1uh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]], <vscale x 4 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svld1uh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i16> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
new file mode 100644
index 000000000000..1c6275712611
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svld1uw_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uw_s64(pg, base);
+}
+
+svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svld1uw_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uw_u64(pg, base);
+}
+
+svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svld1uw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]], <vscale x 2 x i32> zeroinitializer)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svld1uw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
new file mode 100644
index 000000000000..9b2b3f3117a0
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint8_t test_svldff1_s8(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1_s8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_s8,,)(pg, base);
+}
+
+svint16_t test_svldff1_s16(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldff1_s16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_s16,,)(pg, base);
+}
+
+svint32_t test_svldff1_s32(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldff1_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_s32,,)(pg, base);
+}
+
+svint64_t test_svldff1_s64(svbool_t pg, const int64_t *base)
+{
+  // CHECK-LABEL: test_svldff1_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_s64,,)(pg, base);
+}
+
+svuint8_t test_svldff1_u8(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1_u8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_u8,,)(pg, base);
+}
+
+svuint16_t test_svldff1_u16(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldff1_u16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_u16,,)(pg, base);
+}
+
+svuint32_t test_svldff1_u32(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldff1_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_u32,,)(pg, base);
+}
+
+svuint64_t test_svldff1_u64(svbool_t pg, const uint64_t *base)
+{
+  // CHECK-LABEL: test_svldff1_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_u64,,)(pg, base);
+}
+
+svfloat16_t test_svldff1_f16(svbool_t pg, const float16_t *base)
+{
+  // CHECK-LABEL: test_svldff1_f16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base)
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_f16,,)(pg, base);
+}
+
+svfloat32_t test_svldff1_f32(svbool_t pg, const float32_t *base)
+{
+  // CHECK-LABEL: test_svldff1_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_f32,,)(pg, base);
+}
+
+svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base)
+{
+  // CHECK-LABEL: test_svldff1_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1,_f64,,)(pg, base);
+}
+
+svint8_t test_svldff1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_s8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_s8,,)(pg, base, vnum);
+}
+
+svint16_t test_svldff1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_s16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_s16,,)(pg, base, vnum);
+}
+
+svint32_t test_svldff1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_s32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_s32,,)(pg, base, vnum);
+}
+
+svint64_t test_svldff1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_s64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_s64,,)(pg, base, vnum);
+}
+
+svuint8_t test_svldff1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_u8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_u8,,)(pg, base, vnum);
+}
+
+svuint16_t test_svldff1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_u16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_u16,,)(pg, base, vnum);
+}
+
+svuint32_t test_svldff1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_u32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_u32,,)(pg, base, vnum);
+}
+
+svuint64_t test_svldff1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_u64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_u64,,)(pg, base, vnum);
+}
+
+svfloat16_t test_svldff1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_f16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_f16,,)(pg, base, vnum);
+}
+
+svfloat32_t test_svldff1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_f32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_f32,,)(pg, base, vnum);
+}
+
+svfloat64_t test_svldff1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1_vnum_f64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_vnum,_f64,,)(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
new file mode 100644
index 000000000000..c32ba7a218c1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
@@ -0,0 +1,143 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint16_t test_svldff1sb_s16(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_s16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svldff1sb_s16(pg, base);
+}
+
+svint32_t test_svldff1sb_s32(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sb_s32(pg, base);
+}
+
+svint64_t test_svldff1sb_s64(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sb_s64(pg, base);
+}
+
+svuint16_t test_svldff1sb_u16(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_u16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svldff1sb_u16(pg, base);
+}
+
+svuint32_t test_svldff1sb_u32(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sb_u32(pg, base);
+}
+
+svuint64_t test_svldff1sb_u64(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldff1sb_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sb_u64(pg, base);
+}
+
+svint16_t test_svldff1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svldff1sb_vnum_s16(pg, base, vnum);
+}
+
+svint32_t test_svldff1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sb_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldff1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sb_vnum_s64(pg, base, vnum);
+}
+
+svuint16_t test_svldff1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[SEXT]]
+  return svldff1sb_vnum_u16(pg, base, vnum);
+}
+
+svuint32_t test_svldff1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sb_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldff1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sb_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sb_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
new file mode 100644
index 000000000000..4921a408eaf3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint32_t test_svldff1sh_s32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldff1sh_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sh_s32(pg, base);
+}
+
+svint64_t test_svldff1sh_s64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldff1sh_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sh_s64(pg, base);
+}
+
+svuint32_t test_svldff1sh_u32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldff1sh_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sh_u32(pg, base);
+}
+
+svuint64_t test_svldff1sh_u64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldff1sh_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sh_u64(pg, base);
+}
+
+svint32_t test_svldff1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldff1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svldff1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldff1sh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldff1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
new file mode 100644
index 000000000000..7b65d32d1315
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint64_t test_svldff1sw_s64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldff1sw_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sw_s64(pg, base);
+}
+
+svuint64_t test_svldff1sw_u64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldff1sw_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sw_u64(pg, base);
+}
+
+svint64_t test_svldff1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svldff1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1sw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldff1sw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
new file mode 100644
index 000000000000..c098adde24e9
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
@@ -0,0 +1,135 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint16_t test_svldff1ub_s16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_s16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldff1ub_s16(pg, base);
+}
+
+svint32_t test_svldff1ub_s32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1ub_s32(pg, base);
+}
+
+svint64_t test_svldff1ub_s64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1ub_s64(pg, base);
+}
+
+svuint16_t test_svldff1ub_u16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_u16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldff1ub_u16(pg, base);
+}
+
+svuint32_t test_svldff1ub_u32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1ub_u32(pg, base);
+}
+
+svuint64_t test_svldff1ub_u64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldff1ub_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1ub_u64(pg, base);
+}
+
+svint16_t test_svldff1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldff1ub_vnum_s16(pg, base, vnum);
+}
+
+svint32_t test_svldff1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1ub_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldff1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1ub_vnum_s64(pg, base, vnum);
+}
+
+svuint16_t test_svldff1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldff1ub_vnum_u16(pg, base, vnum);
+}
+
+svuint32_t test_svldff1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1ub_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldff1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1ub_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1ub_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
new file mode 100644
index 000000000000..fd5211e6c831
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint32_t test_svldff1uh_s32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldff1uh_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1uh_s32(pg, base);
+}
+
+svint64_t test_svldff1uh_s64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldff1uh_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uh_s64(pg, base);
+}
+
+svuint32_t test_svldff1uh_u32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldff1uh_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1uh_u32(pg, base);
+}
+
+svuint64_t test_svldff1uh_u64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldff1uh_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uh_u64(pg, base);
+}
+
+svint32_t test_svldff1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1uh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldff1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svldff1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldff1uh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldff1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
new file mode 100644
index 000000000000..bdcfca49afa9
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint64_t test_svldff1uw_s64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldff1uw_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uw_s64(pg, base);
+}
+
+svuint64_t test_svldff1uw_u64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldff1uw_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uw_u64(pg, base);
+}
+
+svint64_t test_svldff1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svldff1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldff1uw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldff1uw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
new file mode 100644
index 000000000000..490fd7ed0d9a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint8_t test_svldnf1_s8(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_s8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_s8,,)(pg, base);
+}
+
+svint16_t test_svldnf1_s16(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_s16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_s16,,)(pg, base);
+}
+
+svint32_t test_svldnf1_s32(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_s32,,)(pg, base);
+}
+
+svint64_t test_svldnf1_s64(svbool_t pg, const int64_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_s64,,)(pg, base);
+}
+
+svuint8_t test_svldnf1_u8(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_u8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_u8,,)(pg, base);
+}
+
+svuint16_t test_svldnf1_u16(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_u16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_u16,,)(pg, base);
+}
+
+svuint32_t test_svldnf1_u32(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_u32,,)(pg, base);
+}
+
+svuint64_t test_svldnf1_u64(svbool_t pg, const uint64_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_u64,,)(pg, base);
+}
+
+svfloat16_t test_svldnf1_f16(svbool_t pg, const float16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_f16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base)
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_f16,,)(pg, base);
+}
+
+svfloat32_t test_svldnf1_f32(svbool_t pg, const float32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_f32,,)(pg, base);
+}
+
+svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base)
+{
+  // CHECK-LABEL: test_svldnf1_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1,_f64,,)(pg, base);
+}
+
+svint8_t test_svldnf1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_s8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_s8,,)(pg, base, vnum);
+}
+
+svint16_t test_svldnf1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_s16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_s16,,)(pg, base, vnum);
+}
+
+svint32_t test_svldnf1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_s32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_s32,,)(pg, base, vnum);
+}
+
+svint64_t test_svldnf1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_s64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_s64,,)(pg, base, vnum);
+}
+
+svuint8_t test_svldnf1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_u8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_u8,,)(pg, base, vnum);
+}
+
+svuint16_t test_svldnf1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_u16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_u16,,)(pg, base, vnum);
+}
+
+svuint32_t test_svldnf1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_u32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_u32,,)(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_u64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_u64,,)(pg, base, vnum);
+}
+
+svfloat16_t test_svldnf1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_f16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_f16,,)(pg, base, vnum);
+}
+
+svfloat32_t test_svldnf1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_f32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_f32,,)(pg, base, vnum);
+}
+
+svfloat64_t test_svldnf1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1_vnum_f64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnf1_vnum,_f64,,)(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c
new file mode 100644
index 000000000000..d57fa1fd1110
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint32_t test_svldnf1sh_s32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sh_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldnf1sh_s32(pg, base);
+}
+
+svint64_t test_svldnf1sh_s64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sh_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sh_s64(pg, base);
+}
+
+svuint32_t test_svldnf1sh_u32(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sh_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldnf1sh_u32(pg, base);
+}
+
+svuint64_t test_svldnf1sh_u64(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sh_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sh_u64(pg, base);
+}
+
+svint32_t test_svldnf1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldnf1sh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldnf1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svldnf1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return svldnf1sh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c
new file mode 100644
index 000000000000..5c70e3068c62
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint64_t test_svldnf1sw_s64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sw_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sw_s64(pg, base);
+}
+
+svuint64_t test_svldnf1sw_u64(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1sw_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sw_u64(pg, base);
+}
+
+svint64_t test_svldnf1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1sw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return svldnf1sw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c
new file mode 100644
index 000000000000..19b316d8774d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c
@@ -0,0 +1,135 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint16_t test_svldnf1ub_s16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_s16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldnf1ub_s16(pg, base);
+}
+
+svint32_t test_svldnf1ub_s32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1ub_s32(pg, base);
+}
+
+svint64_t test_svldnf1ub_s64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1ub_s64(pg, base);
+}
+
+svuint16_t test_svldnf1ub_u16(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_u16
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldnf1ub_u16(pg, base);
+}
+
+svuint32_t test_svldnf1ub_u32(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1ub_u32(pg, base);
+}
+
+svuint64_t test_svldnf1ub_u64(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnf1ub_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1ub_u64(pg, base);
+}
+
+svint16_t test_svldnf1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldnf1ub_vnum_s16(pg, base, vnum);
+}
+
+svint32_t test_svldnf1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1ub_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldnf1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1ub_vnum_s64(pg, base, vnum);
+}
+
+svuint16_t test_svldnf1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 8 x i8> %[[LOAD]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[ZEXT]]
+  return svldnf1ub_vnum_u16(pg, base, vnum);
+}
+
+svuint32_t test_svldnf1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1ub_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1ub_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1ub_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c
new file mode 100644
index 000000000000..260bcb2e81a7
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint32_t test_svldnf1uh_s32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uh_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1uh_s32(pg, base);
+}
+
+svint64_t test_svldnf1uh_s64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uh_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uh_s64(pg, base);
+}
+
+svuint32_t test_svldnf1uh_u32(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uh_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1uh_u32(pg, base);
+}
+
+svuint64_t test_svldnf1uh_u64(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uh_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uh_u64(pg, base);
+}
+
+svint32_t test_svldnf1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uh_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1uh_vnum_s32(pg, base, vnum);
+}
+
+svint64_t test_svldnf1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uh_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uh_vnum_s64(pg, base, vnum);
+}
+
+svuint32_t test_svldnf1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uh_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return svldnf1uh_vnum_u32(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uh_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uh_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c
new file mode 100644
index 000000000000..011440da933f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+svint64_t test_svldnf1uw_s64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uw_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uw_s64(pg, base);
+}
+
+svuint64_t test_svldnf1uw_u64(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldnf1uw_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uw_u64(pg, base);
+}
+
+svint64_t test_svldnf1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uw_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uw_vnum_s64(pg, base, vnum);
+}
+
+svuint64_t test_svldnf1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnf1uw_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return svldnf1uw_vnum_u64(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
new file mode 100644
index 000000000000..c685722d1876
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_s8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_s8,,)(pg, base);
+}
+
+svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_s16,,)(pg, base);
+}
+
+svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_s32,,)(pg, base);
+}
+
+svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_s64,,)(pg, base);
+}
+
+svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_u8
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_u8,,)(pg, base);
+}
+
+svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_u16,,)(pg, base);
+}
+
+svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_u32,,)(pg, base);
+}
+
+svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_u64,,)(pg, base);
+}
+
+svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %base)
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_f16,,)(pg, base);
+}
+
+svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_f32,,)(pg, base);
+}
+
+svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base)
+{
+  // CHECK-LABEL: test_svldnt1_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base);
+}
+
+svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_s8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s8,,)(pg, base, vnum);
+}
+
+svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s16,,)(pg, base, vnum);
+}
+
+svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s32,,)(pg, base, vnum);
+}
+
+svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s64,,)(pg, base, vnum);
+}
+
+svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_u8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK: ret <vscale x 16 x i8> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u8,,)(pg, base, vnum);
+}
+
+svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK: ret <vscale x 8 x i16> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u16,,)(pg, base, vnum);
+}
+
+svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u32,,)(pg, base, vnum);
+}
+
+svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u64,,)(pg, base, vnum);
+}
+
+svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
+  // CHECK: ret <vscale x 8 x half> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f16,,)(pg, base, vnum);
+}
+
+svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f32,,)(pg, base, vnum);
+}
+
+svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+{
+  // CHECK-LABEL: test_svldnt1_vnum_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
new file mode 100644
index 000000000000..7831aa972315
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
+{
+  // CHECK-LABEL: test_svst1_s8
+  // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg)
+  return SVE_ACLE_FUNC(svst1,_s8,,)(pg, base, data);
+}
+
+void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
+{
+  // CHECK-LABEL: test_svst1_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_s16,,)(pg, base, data);
+}
+
+void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_s32,,)(pg, base, data);
+}
+
+void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_s64,,)(pg, base, data);
+}
+
+void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
+{
+  // CHECK-LABEL: test_svst1_u8
+  // CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[BASE]], i32 1, <vscale x 16 x i1> %pg)
+  return SVE_ACLE_FUNC(svst1,_u8,,)(pg, base, data);
+}
+
+void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
+{
+  // CHECK-LABEL: test_svst1_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_u16,,)(pg, base, data);
+}
+
+void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_u32,,)(pg, base, data);
+}
+
+void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_u64,,)(pg, base, data);
+}
+
+void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
+{
+  // CHECK-LABEL: test_svst1_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x half>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_f16,,)(pg, base, data);
+}
+
+void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
+{
+  // CHECK-LABEL: test_svst1_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_f32,,)(pg, base, data);
+}
+
+void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
+{
+  // CHECK-LABEL: test_svst1_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data);
+}
+
+void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_s8
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg)
+  return SVE_ACLE_FUNC(svst1_vnum,_s8,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_s16,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_s32,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_s64,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_u8
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i8>* %[[GEP]], i32 1, <vscale x 16 x i1> %pg)
+  return SVE_ACLE_FUNC(svst1_vnum,_u8,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i16>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_u16,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_u32,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_u64,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x half>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_f16,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_f32,,)(pg, base, vnum, data);
+}
+
+void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
+{
+  // CHECK-LABEL: test_svst1_vnum_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
new file mode 100644
index 000000000000..b6ac46b8f852
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
+{
+  // CHECK-LABEL: test_svst1b_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
+  // CHECK: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_s16,,)(pg, base, data);
+}
+
+void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1b_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_s32,,)(pg, base, data);
+}
+
+void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1b_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_s64,,)(pg, base, data);
+}
+
+void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
+{
+  // CHECK-LABEL: test_svst1b_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
+  // CHECK: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[BASE]], i32 1, <vscale x 8 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_u16,,)(pg, base, data);
+}
+
+void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1b_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_u32,,)(pg, base, data);
+}
+
+void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1b_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b,_u64,,)(pg, base, data);
+}
+
+void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_s16,,)(pg, base, vnum, data);
+}
+
+void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_s32,,)(pg, base, vnum, data);
+}
+
+void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_s64,,)(pg, base, vnum, data);
+}
+
+void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 8 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> %[[DATA]], <vscale x 8 x i8>* %[[GEP]], i32 1, <vscale x 8 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_u16,,)(pg, base, vnum, data);
+}
+
+void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 4 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> %[[DATA]], <vscale x 4 x i8>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_u32,,)(pg, base, vnum, data);
+}
+
+void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1b_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i8>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> %[[DATA]], <vscale x 2 x i8>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1b_vnum,_u64,,)(pg, base, vnum, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
new file mode 100644
index 000000000000..a724db4d7103
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1h_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h,_s32,,)(pg, base, data);
+}
+
+void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1h_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h,_s64,,)(pg, base, data);
+}
+
+void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1h_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[BASE]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h,_u32,,)(pg, base, data);
+}
+
+void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1h_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h,_u64,,)(pg, base, data);
+}
+
+void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data)
+{
+  // CHECK-LABEL: test_svst1h_vnum_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h_vnum,_s32,,)(pg, base, vnum, data);
+}
+
+void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1h_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h_vnum,_s64,,)(pg, base, vnum, data);
+}
+
+void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data)
+{
+  // CHECK-LABEL: test_svst1h_vnum_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 4 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> %[[DATA]], <vscale x 4 x i16>* %[[GEP]], i32 1, <vscale x 4 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h_vnum,_u32,,)(pg, base, vnum, data);
+}
+
+void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1h_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 2 x i16>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> %[[DATA]], <vscale x 2 x i16>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1h_vnum,_u64,,)(pg, base, vnum, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
new file mode 100644
index 000000000000..c0d5c7b90e13
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1w_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> %[[DATA]], <vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1w,_s64,,)(pg, base, data);
+}
+
+void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1w_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> %[[DATA]], <vscale x 2 x i32>* %[[BASE]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1w,_u64,,)(pg, base, data);
+}
+
+void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data)
+{
+  // CHECK-LABEL: test_svst1w_vnum_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> %[[DATA]], <vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1w_vnum,_s64,,)(pg, base, vnum, data);
+}
+
+void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data)
+{
+  // CHECK-LABEL: test_svst1w_vnum_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 2 x i32>*
+  // CHECK-DAG: %[[DATA:.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %[[BASE]], i64 %vnum
+  // CHECK: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> %[[DATA]], <vscale x 2 x i32>* %[[GEP]], i32 1, <vscale x 2 x i1> %[[PG]])
+  // CHECK: ret void
+  return SVE_ACLE_FUNC(svst1w_vnum,_u64,,)(pg, base, vnum, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
new file mode 100644
index 000000000000..7924dfa63651
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data)
+{
+  // CHECK-LABEL: test_svstnt1_s8
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_s8,,)(pg, base, data);
+}
+
+void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_s16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_s16,,)(pg, base, data);
+}
+
+void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_s32,,)(pg, base, data);
+}
+
+void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_s64,,)(pg, base, data);
+}
+
+void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
+{
+  // CHECK-LABEL: test_svstnt1_u8
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_u8,,)(pg, base, data);
+}
+
+void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_u16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_u16,,)(pg, base, data);
+}
+
+void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_u32,,)(pg, base, data);
+}
+
+void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_u64,,)(pg, base, data);
+}
+
+void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_f16
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %[[PG]], half* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_f16,,)(pg, base, data);
+}
+
+void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %[[PG]], float* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_f32,,)(pg, base, data);
+}
+
+void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %[[PG]], double* %base)
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data);
+}
+
+void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_s8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s8,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_s16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s16,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_s32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s32,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_s64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s64,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_u8
+  // CHECK: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
+  // CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u8,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_u16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u16,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_u32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u32,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_u64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u64,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_f16
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %[[PG]], half* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f16,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_f32
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %[[PG]], float* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f32,,)(pg, base, vnum, data);
+}
+
+void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
+{
+  // CHECK-LABEL: test_svstnt1_vnum_f64
+  // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
+  // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %[[PG]], double* %[[GEP]])
+  // CHECK-NEXT: ret
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data);
+}

diff  --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index a792530a03af..831dfa804ca0 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -442,6 +442,97 @@ void SVEType::applyModifier(char Mod) {
     Bitwidth = 16;
     ElementBitwidth = 1;
     break;
+  case 'l':
+    Predicate = false;
+    Signed = true;
+    Float = false;
+    ElementBitwidth = Bitwidth = 64;
+    NumVectors = 0;
+    break;
+  case 'S':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 8;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'W':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 8;
+    NumVectors = 0;
+    Signed = false;
+    break;
+  case 'T':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 16;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'X':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 16;
+    NumVectors = 0;
+    Signed = false;
+    break;
+  case 'Y':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 32;
+    NumVectors = 0;
+    Signed = false;
+    break;
+  case 'U':
+    Constant = true;
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 32;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'A':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 8;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'B':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 16;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'C':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 32;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'D':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 64;
+    NumVectors = 0;
+    Signed = true;
+    break;
+  case 'E':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 8;
+    NumVectors = 0;
+    Signed = false;
+    break;
+  case 'F':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 16;
+    NumVectors = 0;
+    Signed = false;
+    break;
+  case 'G':
+    Pointer = true;
+    ElementBitwidth = Bitwidth = 32;
+    NumVectors = 0;
+    Signed = false;
+    break;
   default:
     llvm_unreachable("Unhandled character!");
   }


        


More information about the cfe-commits mailing list