[clang] 72f5658 - [SveEmitter] Implement builtins for gathers/scatters

Andrzej Warzynski via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 22 05:24:42 PDT 2020


Author: Andrzej Warzynski
Date: 2020-04-22T13:21:39+01:00
New Revision: 72f565899dd4bf1fcb183555ba9089b7452b3f8a

URL: https://github.com/llvm/llvm-project/commit/72f565899dd4bf1fcb183555ba9089b7452b3f8a
DIFF: https://github.com/llvm/llvm-project/commit/72f565899dd4bf1fcb183555ba9089b7452b3f8a.diff

LOG: [SveEmitter] Implement builtins for gathers/scatters

This patch adds builtins for:
  * regular, first-faulting and non-temporal gather loads
  * regular and non-temporal scatter stores

Differential Revision: https://reviews.llvm.org/D77735

Added: 
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c

Modified: 
    clang/include/clang/Basic/TargetBuiltins.h
    clang/include/clang/Basic/arm_sve.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
    clang/utils/TableGen/SveEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 0821926a6881..0533b1291664 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -222,6 +222,7 @@ namespace clang {
     bool isStructLoad() const { return Flags & IsStructLoad; }
     bool isStructStore() const { return Flags & IsStructStore; }
     bool isZExtReturn() const { return Flags & IsZExtReturn; }
+    bool isByteIndexed() const { return Flags & IsByteIndexed; }
 
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index cd01662b0858..88ea0d59b394 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -58,6 +58,7 @@
 // -------------------
 // prototype: return (arg, arg, ...)
 //
+// x: vector of signed integers
 // u: vector of unsigned integers
 // d: default
 // c: const pointer type
@@ -155,6 +156,9 @@ def IsScatterStore            : FlagType<0x00010000>;
 def IsStructLoad              : FlagType<0x00020000>;
 def IsStructStore             : FlagType<0x00040000>;
 def IsZExtReturn              : FlagType<0x00080000>; // Return value is sign-extend by default
+//      :                                     :
+//      :                                     :
+def IsByteIndexed             : FlagType<0x02000000>;
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
@@ -226,6 +230,84 @@ def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad,
 def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32>;
 def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32>;
 
+// Load one vector (vector base)
+def SVLD1_GATHER_BASES_U   : MInst<"svld1_gather[_{2}base]_{d}",   "dPu", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SB_GATHER_BASES_U : MInst<"svld1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt8,    "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UB_GATHER_BASES_U : MInst<"svld1ub_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SH_GATHER_BASES_U : MInst<"svld1sh_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UH_GATHER_BASES_U : MInst<"svld1uh_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SW_GATHER_BASES_U : MInst<"svld1sw_gather[_{2}base]_{d}", "dPu", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UW_GATHER_BASES_U : MInst<"svld1uw_gather[_{2}base]_{d}", "dPu", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+
+// Load one vector (scalar base, signed vector offset in bytes)
+def SVLD1_GATHER_64B_OFFSETS_S   : MInst<"svld1_gather_[{3}]offset[_{d}]", "dPcx", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ld1_gather">;
+def SVLD1SB_GATHER_64B_OFFSETS_S : MInst<"svld1sb_gather_[{3}]offset_{d}", "dPSx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ld1_gather">;
+def SVLD1UB_GATHER_64B_OFFSETS_S : MInst<"svld1ub_gather_[{3}]offset_{d}", "dPWx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather">;
+def SVLD1SH_GATHER_64B_OFFSETS_S : MInst<"svld1sh_gather_[{3}]offset_{d}", "dPTx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ld1_gather">;
+def SVLD1UH_GATHER_64B_OFFSETS_S : MInst<"svld1uh_gather_[{3}]offset_{d}", "dPXx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather">;
+def SVLD1SW_GATHER_64B_OFFSETS_S : MInst<"svld1sw_gather_[{3}]offset_{d}", "dPUx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ld1_gather">;
+def SVLD1UW_GATHER_64B_OFFSETS_S : MInst<"svld1uw_gather_[{3}]offset_{d}", "dPYx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather">;
+
+def SVLD1_GATHER_32B_OFFSETS_S   : MInst<"svld1_gather_[{3}]offset[_{d}]", "dPcx", "iUif", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ld1_gather_sxtw">;
+def SVLD1SB_GATHER_32B_OFFSETS_S : MInst<"svld1sb_gather_[{3}]offset_{d}", "dPSx", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ld1_gather_sxtw">;
+def SVLD1UB_GATHER_32B_OFFSETS_S : MInst<"svld1ub_gather_[{3}]offset_{d}", "dPWx", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather_sxtw">;
+def SVLD1SH_GATHER_32B_OFFSETS_S : MInst<"svld1sh_gather_[{3}]offset_{d}", "dPTx", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ld1_gather_sxtw">;
+def SVLD1UH_GATHER_32B_OFFSETS_S : MInst<"svld1uh_gather_[{3}]offset_{d}", "dPXx", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_sxtw">;
+
+// Load one vector (scalar base, unsigned vector offset in bytes)
+def SVLD1_GATHER_64B_OFFSETS_U   : MInst<"svld1_gather_[{3}]offset[_{d}]", "dPcu", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ld1_gather">;
+def SVLD1SB_GATHER_64B_OFFSETS_U : MInst<"svld1sb_gather_[{3}]offset_{d}", "dPSu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ld1_gather">;
+def SVLD1UB_GATHER_64B_OFFSETS_U : MInst<"svld1ub_gather_[{3}]offset_{d}", "dPWu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather">;
+def SVLD1SH_GATHER_64B_OFFSETS_U : MInst<"svld1sh_gather_[{3}]offset_{d}", "dPTu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ld1_gather">;
+def SVLD1UH_GATHER_64B_OFFSETS_U : MInst<"svld1uh_gather_[{3}]offset_{d}", "dPXu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather">;
+def SVLD1SW_GATHER_64B_OFFSETS_U : MInst<"svld1sw_gather_[{3}]offset_{d}", "dPUu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ld1_gather">;
+def SVLD1UW_GATHER_64B_OFFSETS_U : MInst<"svld1uw_gather_[{3}]offset_{d}", "dPYu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather">;
+
+def SVLD1_GATHER_32B_OFFSETS_U   : MInst<"svld1_gather_[{3}]offset[_{d}]", "dPcu", "iUif", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ld1_gather_uxtw">;
+def SVLD1SB_GATHER_32B_OFFSETS_U : MInst<"svld1sb_gather_[{3}]offset_{d}", "dPSu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ld1_gather_uxtw">;
+def SVLD1UB_GATHER_32B_OFFSETS_U : MInst<"svld1ub_gather_[{3}]offset_{d}", "dPWu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather_uxtw">;
+def SVLD1SH_GATHER_32B_OFFSETS_U : MInst<"svld1sh_gather_[{3}]offset_{d}", "dPTu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ld1_gather_uxtw">;
+def SVLD1UH_GATHER_32B_OFFSETS_U : MInst<"svld1uh_gather_[{3}]offset_{d}", "dPXu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_uxtw">;
+
+// Load one vector (vector base, signed scalar offset in bytes)
+def SVLD1_GATHER_OFFSET_S   : MInst<"svld1_gather[_{2}base]_offset_{d}",   "dPul", "ilUiUlfd", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SB_GATHER_OFFSET_S : MInst<"svld1sb_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UB_GATHER_OFFSET_S : MInst<"svld1ub_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SH_GATHER_OFFSET_S : MInst<"svld1sh_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UH_GATHER_OFFSET_S : MInst<"svld1uh_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SW_GATHER_OFFSET_S : MInst<"svld1sw_gather[_{2}base]_offset_{d}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UW_GATHER_OFFSET_S : MInst<"svld1uw_gather[_{2}base]_offset_{d}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+
+// Load one vector (scalar base, signed vector index)
+def SVLD1_GATHER_64B_INDICES_S   : MInst<"svld1_gather_[{3}]index[_{d}]", "dPcx", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_index">;
+def SVLD1SH_GATHER_64B_INDICES_S : MInst<"svld1sh_gather_[{3}]index_{d}", "dPTx", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_index">;
+def SVLD1UH_GATHER_64B_INDICES_S : MInst<"svld1uh_gather_[{3}]index_{d}", "dPXx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_index">;
+def SVLD1SW_GATHER_64B_INDICES_S : MInst<"svld1sw_gather_[{3}]index_{d}", "dPUx", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ld1_gather_index">;
+def SVLD1UW_GATHER_64B_INDICES_S : MInst<"svld1uw_gather_[{3}]index_{d}", "dPYx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather_index">;
+
+def SVLD1_GATHER_32B_INDICES_S   : MInst<"svld1_gather_[{3}]index[_{d}]", "dPcx", "iUif", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_sxtw_index">;
+def SVLD1SH_GATHER_32B_INDICES_S : MInst<"svld1sh_gather_[{3}]index_{d}", "dPTx", "iUi",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_sxtw_index">;
+def SVLD1UH_GATHER_32B_INDICES_S : MInst<"svld1uh_gather_[{3}]index_{d}", "dPXx", "iUi",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_sxtw_index">;
+
+// Load one vector (scalar base, unsigned vector index)
+def SVLD1_GATHER_64B_INDICES_U   : MInst<"svld1_gather_[{3}]index[_{d}]", "dPcu", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_index">;
+def SVLD1SH_GATHER_64B_INDICES_U : MInst<"svld1sh_gather_[{3}]index_{d}", "dPTu", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_index">;
+def SVLD1UH_GATHER_64B_INDICES_U : MInst<"svld1uh_gather_[{3}]index_{d}", "dPXu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_index">;
+def SVLD1SW_GATHER_64B_INDICES_U : MInst<"svld1sw_gather_[{3}]index_{d}", "dPUu", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ld1_gather_index">;
+def SVLD1UW_GATHER_64B_INDICES_U : MInst<"svld1uw_gather_[{3}]index_{d}", "dPYu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather_index">;
+
+def SVLD1_GATHER_32B_INDICES_U   : MInst<"svld1_gather_[{3}]index[_{d}]", "dPcu", "iUif", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_uxtw_index">;
+def SVLD1SH_GATHER_32B_INDICES_U : MInst<"svld1sh_gather_[{3}]index_{d}", "dPTu", "iUi",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_uxtw_index">;
+def SVLD1UH_GATHER_32B_INDICES_U : MInst<"svld1uh_gather_[{3}]index_{d}", "dPXu", "iUi",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_uxtw_index">;
+
+// Load one vector (vector base, signed scalar index)
+def SVLD1_GATHER_INDEX_S     : MInst<"svld1_gather[_{2}base]_index_{d}",   "dPul", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SH_GATHER_INDEX_S   : MInst<"svld1sh_gather[_{2}base]_index_{d}", "dPul", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UH_GATHER_INDEX_S   : MInst<"svld1uh_gather[_{2}base]_index_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1SW_GATHER_INDEX_S   : MInst<"svld1sw_gather[_{2}base]_index_{d}", "dPul", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+def SVLD1UW_GATHER_INDEX_S   : MInst<"svld1uw_gather[_{2}base]_index_{d}", "dPul", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ld1_gather_scalar_offset">;
+
+
 // First-faulting load one vector (scalar base)
 def SVLDFF1   : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldff1">;
 def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldff1">;
@@ -244,6 +326,83 @@ def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsL
 def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldff1">;
 def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1">;
 
+// First-faulting load one vector (vector base)
+def SVLDFF1_GATHER_BASES_U   : MInst<"svldff1_gather[_{2}base]_{d}",   "dPu", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SB_GATHER_BASES_U : MInst<"svldff1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt8,    "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UB_GATHER_BASES_U : MInst<"svldff1ub_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SH_GATHER_BASES_U : MInst<"svldff1sh_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UH_GATHER_BASES_U : MInst<"svldff1uh_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SW_GATHER_BASES_U : MInst<"svldff1sw_gather[_{2}base]_{d}", "dPu", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UW_GATHER_BASES_U : MInst<"svldff1uw_gather[_{2}base]_{d}", "dPu", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+
+// First-faulting load one vector (scalar base, signed vector offset in bytes)
+def SVLDFF1_GATHER_64B_OFFSETS_S   : MInst<"svldff1_gather_[{3}]offset[_{d}]", "dPcx", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldff1_gather">;
+def SVLDFF1SB_GATHER_64B_OFFSETS_S : MInst<"svldff1sb_gather_[{3}]offset_{d}", "dPSx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldff1_gather">;
+def SVLDFF1UB_GATHER_64B_OFFSETS_S : MInst<"svldff1ub_gather_[{3}]offset_{d}", "dPWx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather">;
+def SVLDFF1SH_GATHER_64B_OFFSETS_S : MInst<"svldff1sh_gather_[{3}]offset_{d}", "dPTx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1UH_GATHER_64B_OFFSETS_S : MInst<"svldff1uh_gather_[{3}]offset_{d}", "dPXx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1SW_GATHER_64B_OFFSETS_S : MInst<"svldff1sw_gather_[{3}]offset_{d}", "dPUx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1UW_GATHER_64B_OFFSETS_S : MInst<"svldff1uw_gather_[{3}]offset_{d}", "dPYx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather">;
+
+def SVLDFF1_GATHER_32B_OFFSETS_S   : MInst<"svldff1_gather_[{3}]offset[_{d}]", "dPcx", "iUif", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldff1_gather_sxtw">;
+def SVLDFF1SB_GATHER_32B_OFFSETS_S : MInst<"svldff1sb_gather_[{3}]offset_{d}", "dPSx", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldff1_gather_sxtw">;
+def SVLDFF1UB_GATHER_32B_OFFSETS_S : MInst<"svldff1ub_gather_[{3}]offset_{d}", "dPWx", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather_sxtw">;
+def SVLDFF1SH_GATHER_32B_OFFSETS_S : MInst<"svldff1sh_gather_[{3}]offset_{d}", "dPTx", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_sxtw">;
+def SVLDFF1UH_GATHER_32B_OFFSETS_S : MInst<"svldff1uh_gather_[{3}]offset_{d}", "dPXx", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_sxtw">;
+
+// First-faulting load one vector (scalar base, unsigned vector offset in bytes)
+def SVLDFF1_GATHER_64B_OFFSETS_U   : MInst<"svldff1_gather_[{3}]offset[_{d}]", "dPcu", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldff1_gather">;
+def SVLDFF1SB_GATHER_64B_OFFSETS_U : MInst<"svldff1sb_gather_[{3}]offset_{d}", "dPSu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldff1_gather">;
+def SVLDFF1UB_GATHER_64B_OFFSETS_U : MInst<"svldff1ub_gather_[{3}]offset_{d}", "dPWu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather">;
+def SVLDFF1SH_GATHER_64B_OFFSETS_U : MInst<"svldff1sh_gather_[{3}]offset_{d}", "dPTu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1UH_GATHER_64B_OFFSETS_U : MInst<"svldff1uh_gather_[{3}]offset_{d}", "dPXu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1SW_GATHER_64B_OFFSETS_U : MInst<"svldff1sw_gather_[{3}]offset_{d}", "dPUu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldff1_gather">;
+def SVLDFF1UW_GATHER_64B_OFFSETS_U : MInst<"svldff1uw_gather_[{3}]offset_{d}", "dPYu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather">;
+
+def SVLDFF1_GATHER_32B_OFFSETS_U   : MInst<"svldff1_gather_[{3}]offset[_{d}]", "dPcu", "iUif", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldff1_gather_uxtw">;
+def SVLDFF1SB_GATHER_32B_OFFSETS_U : MInst<"svldff1sb_gather_[{3}]offset_{d}", "dPSu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldff1_gather_uxtw">;
+def SVLDFF1UB_GATHER_32B_OFFSETS_U : MInst<"svldff1ub_gather_[{3}]offset_{d}", "dPWu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather_uxtw">;
+def SVLDFF1SH_GATHER_32B_OFFSETS_U : MInst<"svldff1sh_gather_[{3}]offset_{d}", "dPTu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_uxtw">;
+def SVLDFF1UH_GATHER_32B_OFFSETS_U : MInst<"svldff1uh_gather_[{3}]offset_{d}", "dPXu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_uxtw">;
+
+// First-faulting load one vector (vector base, signed scalar offset in bytes)
+def SVLDFF1_GATHER_OFFSET_S   : MInst<"svldff1_gather[_{2}base]_offset_{d}",   "dPul", "ilUiUlfd", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SB_GATHER_OFFSET_S : MInst<"svldff1sb_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UB_GATHER_OFFSET_S : MInst<"svldff1ub_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SH_GATHER_OFFSET_S : MInst<"svldff1sh_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UH_GATHER_OFFSET_S : MInst<"svldff1uh_gather[_{2}base]_offset_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SW_GATHER_OFFSET_S : MInst<"svldff1sw_gather[_{2}base]_offset_{d}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UW_GATHER_OFFSET_S : MInst<"svldff1uw_gather[_{2}base]_offset_{d}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+
+// First-faulting load one vector (scalar base, signed vector index)
+def SVLDFF1_GATHER_64B_INDICES_S   : MInst<"svldff1_gather_[{3}]index[_{d}]", "dPcx", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1SH_GATHER_64B_INDICES_S : MInst<"svldff1sh_gather_[{3}]index_{d}", "dPTx", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1UH_GATHER_64B_INDICES_S : MInst<"svldff1uh_gather_[{3}]index_{d}", "dPXx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1SW_GATHER_64B_INDICES_S : MInst<"svldff1sw_gather_[{3}]index_{d}", "dPUx", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1UW_GATHER_64B_INDICES_S : MInst<"svldff1uw_gather_[{3}]index_{d}", "dPYx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather_index">;
+
+def SVLDFF1_GATHER_32B_INDICES_S   : MInst<"svldff1_gather_[{3}]index[_{d}]", "dPcx", "iUif", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_sxtw_index">;
+def SVLDFF1SH_GATHER_32B_INDICES_S : MInst<"svldff1sh_gather_[{3}]index_{d}", "dPTx", "iUi",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_sxtw_index">;
+def SVLDFF1UH_GATHER_32B_INDICES_S : MInst<"svldff1uh_gather_[{3}]index_{d}", "dPXx", "iUi",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_sxtw_index">;
+
+// First-faulting load one vector (scalar base, unsigned vector index)
+def SVLDFF1_GATHER_64B_INDICES_U   : MInst<"svldff1_gather_[{3}]index[_{d}]", "dPcu", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1SH_GATHER_64B_INDICES_U : MInst<"svldff1sh_gather_[{3}]index_{d}", "dPTu", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1UH_GATHER_64B_INDICES_U : MInst<"svldff1uh_gather_[{3}]index_{d}", "dPXu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1SW_GATHER_64B_INDICES_U : MInst<"svldff1sw_gather_[{3}]index_{d}", "dPUu", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldff1_gather_index">;
+def SVLDFF1UW_GATHER_64B_INDICES_U : MInst<"svldff1uw_gather_[{3}]index_{d}", "dPYu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather_index">;
+
+def SVLDFF1_GATHER_32B_INDICES_U   : MInst<"svldff1_gather_[{3}]index[_{d}]", "dPcu", "iUif", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_uxtw_index">;
+def SVLDFF1SH_GATHER_32B_INDICES_U : MInst<"svldff1sh_gather_[{3}]index_{d}", "dPTu", "iUi",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_uxtw_index">;
+def SVLDFF1UH_GATHER_32B_INDICES_U : MInst<"svldff1uh_gather_[{3}]index_{d}", "dPXu", "iUi",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_uxtw_index">;
+
+// First-faulting load one vector (vector base, signed scalar index)
+def SVLDFF1_GATHER_INDEX_S   : MInst<"svldff1_gather[_{2}base]_index_{d}",   "dPul", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SH_GATHER_INDEX_S : MInst<"svldff1sh_gather[_{2}base]_index_{d}", "dPul", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UH_GATHER_INDEX_S : MInst<"svldff1uh_gather[_{2}base]_index_{d}", "dPul", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1SW_GATHER_INDEX_S : MInst<"svldff1sw_gather[_{2}base]_index_{d}", "dPul", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+def SVLDFF1UW_GATHER_INDEX_S : MInst<"svldff1uw_gather[_{2}base]_index_{d}", "dPul", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1_gather_scalar_offset">;
+
 // Non-faulting load one vector (scalar base)
 def SVLDNF1   : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad],               MemEltTyDefault, "aarch64_sve_ldnf1">;
 def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl",       [IsLoad],               MemEltTyInt8,    "aarch64_sve_ldnf1">;
@@ -376,3 +535,125 @@ def SVQDECH_U : SInst<"svqdech_pat[_{d}]",   "ddIi", "Us", MergeNone, "aarch64_s
 // Integer arithmetic
 def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]",  "ddqqi",  "il",   MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
 def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]",  "ddqqi",  "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SVE2 - Non-temporal gather/scatter
+let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {
+// Non-temporal gather load one vector (vector base)
+def SVLDNT1_GATHER_BASES_U   : MInst<"svldnt1_gather[_{2}base]_{0}",   "dPu", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SB_GATHER_BASES_U : MInst<"svldnt1sb_gather[_{2}base]_{0}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt8,    "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UB_GATHER_BASES_U : MInst<"svldnt1ub_gather[_{2}base]_{0}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SH_GATHER_BASES_U : MInst<"svldnt1sh_gather[_{2}base]_{0}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UH_GATHER_BASES_U : MInst<"svldnt1uh_gather[_{2}base]_{0}", "dPu", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SW_GATHER_BASES_U : MInst<"svldnt1sw_gather[_{2}base]_{0}", "dPu", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UW_GATHER_BASES_U : MInst<"svldnt1uw_gather[_{2}base]_{0}", "dPu", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+
+// Non-temporal gather load one vector (scalar base, signed vector offset in bytes)
+def SVLDNT1_GATHER_64B_OFFSETS_S   : MInst<"svldnt1_gather_[{3}]offset[_{0}]", "dPcx", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SB_GATHER_64B_OFFSETS_S : MInst<"svldnt1sb_gather_[{3}]offset_{0}", "dPSx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UB_GATHER_64B_OFFSETS_S : MInst<"svldnt1ub_gather_[{3}]offset_{0}", "dPWx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SH_GATHER_64B_OFFSETS_S : MInst<"svldnt1sh_gather_[{3}]offset_{0}", "dPTx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UH_GATHER_64B_OFFSETS_S : MInst<"svldnt1uh_gather_[{3}]offset_{0}", "dPXx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SW_GATHER_64B_OFFSETS_S : MInst<"svldnt1sw_gather_[{3}]offset_{0}", "dPUx", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UW_GATHER_64B_OFFSETS_S : MInst<"svldnt1uw_gather_[{3}]offset_{0}", "dPYx", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather">;
+
+// Non-temporal gather load one vector (scalar base, unsigned vector offset in bytes)
+def SVLDNT1_GATHER_64B_OFFSETS_U   : MInst<"svldnt1_gather_[{3}]offset[_{0}]", "dPcu", "lUld", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SB_GATHER_64B_OFFSETS_U : MInst<"svldnt1sb_gather_[{3}]offset_{0}", "dPSu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UB_GATHER_64B_OFFSETS_U : MInst<"svldnt1ub_gather_[{3}]offset_{0}", "dPWu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SH_GATHER_64B_OFFSETS_U : MInst<"svldnt1sh_gather_[{3}]offset_{0}", "dPTu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UH_GATHER_64B_OFFSETS_U : MInst<"svldnt1uh_gather_[{3}]offset_{0}", "dPXu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1SW_GATHER_64B_OFFSETS_U : MInst<"svldnt1sw_gather_[{3}]offset_{0}", "dPUu", "lUl",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather">;
+def SVLDNT1UW_GATHER_64B_OFFSETS_U : MInst<"svldnt1uw_gather_[{3}]offset_{0}", "dPYu", "lUl",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather">;
+
+def SVLDNT1_GATHER_32B_OFFSETS_U   : MInst<"svldnt1_gather_[{3}]offset[_{0}]", "dPcu", "iUif", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_uxtw">;
+def SVLDNT1SB_GATHER_32B_OFFSETS_U : MInst<"svldnt1sb_gather_[{3}]offset_{0}", "dPSu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldnt1_gather_uxtw">;
+def SVLDNT1UB_GATHER_32B_OFFSETS_U : MInst<"svldnt1ub_gather_[{3}]offset_{0}", "dPWu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnt1_gather_uxtw">;
+def SVLDNT1SH_GATHER_32B_OFFSETS_U : MInst<"svldnt1sh_gather_[{3}]offset_{0}", "dPTu", "iUi",  [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_uxtw">;
+def SVLDNT1UH_GATHER_32B_OFFSETS_U : MInst<"svldnt1uh_gather_[{3}]offset_{0}", "dPXu", "iUi",  [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_uxtw">;
+
+// Non-temporal gather load one vector (vector base, scalar offset in bytes)
+def SVLDNT1_GATHER_OFFSET_S   : MInst<"svldnt1_gather[_{2}base]_offset_{0}",   "dPul", "ilUiUlfd", [IsGatherLoad, IsByteIndexed],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SB_GATHER_OFFSET_S : MInst<"svldnt1sb_gather[_{2}base]_offset_{0}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt8,    "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UB_GATHER_OFFSET_S : MInst<"svldnt1ub_gather[_{2}base]_offset_{0}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt8,    "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SH_GATHER_OFFSET_S : MInst<"svldnt1sh_gather[_{2}base]_offset_{0}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UH_GATHER_OFFSET_S : MInst<"svldnt1uh_gather[_{2}base]_offset_{0}", "dPul", "ilUiUl",   [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SW_GATHER_OFFSET_S : MInst<"svldnt1sw_gather[_{2}base]_offset_{0}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UW_GATHER_OFFSET_S : MInst<"svldnt1uw_gather[_{2}base]_offset_{0}", "dPul", "lUl",      [IsGatherLoad, IsByteIndexed, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+
+// Non-temporal gather load one vector (scalar base, signed vector index)
+def SVLDNT1_GATHER_64B_INDICES_S   : MInst<"svldnt1_gather_[{3}]index[_{0}]", "dPcx", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1SH_GATHER_64B_INDICES_S : MInst<"svldnt1sh_gather_[{3}]index_{0}", "dPTx", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1UH_GATHER_64B_INDICES_S : MInst<"svldnt1uh_gather_[{3}]index_{0}", "dPXx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1SW_GATHER_64B_INDICES_S : MInst<"svldnt1sw_gather_[{3}]index_{0}", "dPUx", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1UW_GATHER_64B_INDICES_S : MInst<"svldnt1uw_gather_[{3}]index_{0}", "dPYx", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather_index">;
+
+// Non temporal gather load one vector (scalar base, unsigned vector index)
+def SVLDNT1_GATHER_64B_INDICES_U   : MInst<"svldnt1_gather_[{3}]index[_{0}]", "dPcu", "lUld", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1SH_GATHER_64B_INDICES_U : MInst<"svldnt1sh_gather_[{3}]index_{0}", "dPTu", "lUl",  [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1UH_GATHER_64B_INDICES_U : MInst<"svldnt1uh_gather_[{3}]index_{0}", "dPXu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1SW_GATHER_64B_INDICES_U : MInst<"svldnt1sw_gather_[{3}]index_{0}", "dPUu", "lUl",  [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather_index">;
+def SVLDNT1UW_GATHER_64B_INDICES_U : MInst<"svldnt1uw_gather_[{3}]index_{0}", "dPYu", "lUl",  [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather_index">;
+
+// Non-temporal gather load one vector (vector base, signed scalar index)
+def SVLDNT1_GATHER_INDEX_S   : MInst<"svldnt1_gather[_{2}base]_index_{0}",   "dPul", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SH_GATHER_INDEX_S : MInst<"svldnt1sh_gather[_{2}base]_index_{0}", "dPul", "ilUiUl",   [IsGatherLoad],               MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UH_GATHER_INDEX_S : MInst<"svldnt1uh_gather[_{2}base]_index_{0}", "dPul", "ilUiUl",   [IsGatherLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1SW_GATHER_INDEX_S : MInst<"svldnt1sw_gather[_{2}base]_index_{0}", "dPul", "lUl",      [IsGatherLoad],               MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+def SVLDNT1UW_GATHER_INDEX_S : MInst<"svldnt1uw_gather[_{2}base]_index_{0}", "dPul", "lUl",      [IsGatherLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnt1_gather_scalar_offset">;
+
+// Non-temporal scatter store one vector (vector base)
+def SVSTNT1_SCATTER_BASES_U  : MInst<"svstnt1_scatter[_{2}base_{d}]",  "vPud", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1B_SCATTER_BASES_U : MInst<"svstnt1b_scatter[_{2}base_{d}]", "vPud", "ilUiUl",   [IsScatterStore], MemEltTyInt8,    "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1H_SCATTER_BASES_U : MInst<"svstnt1h_scatter[_{2}base_{d}]", "vPud", "ilUiUl",   [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1W_SCATTER_BASES_U : MInst<"svstnt1w_scatter[_{2}base_{d}]", "vPud", "lUl",      [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+
+// Non-temporal scatter store one vector (scalar base, signed vector offset in bytes)
+def SVSTNT1_SCATTER_64B_OFFSETS_S   : MInst<"svstnt1_scatter_[{3}]offset[_{d}]",  "vPpxd", "lUld", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_stnt1_scatter">;
+def SVSTNT1B_SCATTER_64B_OFFSETS_SS : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPAxd", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter">;
+def SVSTNT1B_SCATTER_64B_OFFSETS_SU : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPExd", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter">;
+def SVSTNT1H_SCATTER_64B_OFFSETS_SS : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPBxd", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1H_SCATTER_64B_OFFSETS_SU : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPFxd", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1W_SCATTER_64B_OFFSETS_SS : MInst<"svstnt1w_scatter_[{3}]offset[_{d}]", "vPCxd", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt32,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1W_SCATTER_64B_OFFSETS_SU : MInst<"svstnt1w_scatter_[{3}]offset[_{d}]", "vPGxd", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt32,   "aarch64_sve_stnt1_scatter">;
+
+// Non-temporal scatter store one vector (scalar base, unsigned vector offset in bytes)
+def SVSTNT1_SCATTER_64B_OFFSETS_U   : MInst<"svstnt1_scatter_[{3}]offset[_{d}]",  "vPpud", "lUld", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_stnt1_scatter">;
+def SVSTNT1B_SCATTER_64B_OFFSETS_US : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPAud", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter">;
+def SVSTNT1B_SCATTER_64B_OFFSETS_UU : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPEud", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter">;
+def SVSTNT1H_SCATTER_64B_OFFSETS_US : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPBud", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1H_SCATTER_64B_OFFSETS_UU : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPFud", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1W_SCATTER_64B_OFFSETS_US : MInst<"svstnt1w_scatter_[{3}]offset[_{d}]", "vPCud", "l",    [IsScatterStore, IsByteIndexed], MemEltTyInt32,   "aarch64_sve_stnt1_scatter">;
+def SVSTNT1W_SCATTER_64B_OFFSETS_UU : MInst<"svstnt1w_scatter_[{3}]offset[_{d}]", "vPGud", "Ul",   [IsScatterStore, IsByteIndexed], MemEltTyInt32,   "aarch64_sve_stnt1_scatter">;
+
+def SVSTNT1_SCATTER_32B_OFFSETS_U   : MInst<"svstnt1_scatter_[{3}]offset[_{d}]",  "vPpud", "iUif", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_stnt1_scatter_uxtw">;
+def SVSTNT1B_SCATTER_32B_OFFSETS_US : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPAud", "i",    [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter_uxtw">;
+def SVSTNT1B_SCATTER_32B_OFFSETS_UU : MInst<"svstnt1b_scatter_[{3}]offset[_{d}]", "vPEud", "Ui",   [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter_uxtw">;
+def SVSTNT1H_SCATTER_32B_OFFSETS_US : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPBud", "i",    [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_uxtw">;
+def SVSTNT1H_SCATTER_32B_OFFSETS_UU : MInst<"svstnt1h_scatter_[{3}]offset[_{d}]", "vPFud", "Ui",   [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_uxtw">;
+
+// Non-temporal scatter store one vector (vector base, scalar offset in bytes)
+def SVSTNT1_SCATTER_OFFSET_S  : MInst<"svstnt1_scatter[_{2}base]_offset[_{d}]",  "vPuld", "ilUiUlfd", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1B_SCATTER_OFFSET_S : MInst<"svstnt1b_scatter[_{2}base]_offset[_{d}]", "vPuld", "ilUiUl",   [IsScatterStore, IsByteIndexed], MemEltTyInt8,    "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1H_SCATTER_OFFSET_S : MInst<"svstnt1h_scatter[_{2}base]_offset[_{d}]", "vPuld", "ilUiUl",   [IsScatterStore, IsByteIndexed], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1W_SCATTER_OFFSET_S : MInst<"svstnt1w_scatter[_{2}base]_offset[_{d}]", "vPuld", "lUl",      [IsScatterStore, IsByteIndexed], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+
+// Non-temporal scatter store one vector (scalar base, signed vector index)
+def SVSTNT1_SCATTER_INDICES_S   : MInst<"svstnt1_scatter_[{3}]index[_{d}]",  "vPpxd", "lUld", [IsScatterStore], MemEltTyDefault, "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1H_SCATTER_INDICES_SS : MInst<"svstnt1h_scatter_[{3}]index[_{d}]", "vPBxd", "l",    [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1H_SCATTER_INDICES_SU : MInst<"svstnt1h_scatter_[{3}]index[_{d}]", "vPFxd", "Ul",   [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1W_SCATTER_INDICES_SS : MInst<"svstnt1w_scatter_[{3}]index[_{d}]", "vPCxd", "l",    [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1W_SCATTER_INDICES_SU : MInst<"svstnt1w_scatter_[{3}]index[_{d}]", "vPGxd", "Ul",   [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_index">;
+
+// Non-temporal scatter store one vector (scalar base, unsigned vector index)
+def SVSTNT1_SCATTER_INDICES_U   : MInst<"svstnt1_scatter_[{3}]index[_{d}]",  "vPpud", "lUld", [IsScatterStore], MemEltTyDefault, "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1H_SCATTER_INDICES_US : MInst<"svstnt1h_scatter_[{3}]index[_{d}]", "vPBud", "l",    [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1H_SCATTER_INDICES_UU : MInst<"svstnt1h_scatter_[{3}]index[_{d}]", "vPFud", "Ul",   [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1W_SCATTER_INDICES_US : MInst<"svstnt1w_scatter_[{3}]index[_{d}]", "vPCud", "l",    [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_index">;
+def SVSTNT1W_SCATTER_INDICES_UU : MInst<"svstnt1w_scatter_[{3}]index[_{d}]", "vPGud", "Ul",   [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_index">;
+
+// Non-temporal scatter store one vector (vector base, signed scalar index)
+def SVSTNT1_SCATTER_INDEX_S  : MInst<"svstnt1_scatter[_{2}base]_index[_{d}]",  "vPuld", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1H_SCATTER_INDEX_S : MInst<"svstnt1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl",   [IsScatterStore], MemEltTyInt16,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl",      [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_scalar_offset">;
+}

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b2fb2638c1a5..57d57af74d18 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7463,8 +7463,56 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
   return Op;
 }
 
+/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
+/// access builtin.  Only required if it can't be inferred from the base pointer
+/// operand.
+llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) {
+  switch (TypeFlags.getMemEltType()) {
+  case SVETypeFlags::MemEltTyDefault:
+    return getEltType(TypeFlags);
+  case SVETypeFlags::MemEltTyInt8:
+    return Builder.getInt8Ty();
+  case SVETypeFlags::MemEltTyInt16:
+    return Builder.getInt16Ty();
+  case SVETypeFlags::MemEltTyInt32:
+    return Builder.getInt32Ty();
+  case SVETypeFlags::MemEltTyInt64:
+    return Builder.getInt64Ty();
+  }
+  llvm_unreachable("Unknown MemEltType");
+}
+
+llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) {
+  switch (TypeFlags.getEltType()) {
+  default:
+    llvm_unreachable("Invalid SVETypeFlag!");
+
+  case SVETypeFlags::EltTyInt8:
+    return Builder.getInt8Ty();
+  case SVETypeFlags::EltTyInt16:
+    return Builder.getInt16Ty();
+  case SVETypeFlags::EltTyInt32:
+    return Builder.getInt32Ty();
+  case SVETypeFlags::EltTyInt64:
+    return Builder.getInt64Ty();
+
+  case SVETypeFlags::EltTyFloat16:
+    return Builder.getHalfTy();
+  case SVETypeFlags::EltTyFloat32:
+    return Builder.getFloatTy();
+  case SVETypeFlags::EltTyFloat64:
+    return Builder.getDoubleTy();
+
+  case SVETypeFlags::EltTyBool8:
+  case SVETypeFlags::EltTyBool16:
+  case SVETypeFlags::EltTyBool32:
+  case SVETypeFlags::EltTyBool64:
+    return Builder.getInt1Ty();
+  }
+}
+
 // Return the llvm vector type corresponding to the specified element TypeFlags.
-llvm::Type *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
+llvm::VectorType *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
   switch (TypeFlags.getEltType()) {
   default:
     llvm_unreachable("Invalid SVETypeFlag!");
@@ -7528,6 +7576,113 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
   return C;
 }
 
+Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+                                          SmallVectorImpl<Value *> &Ops,
+                                          unsigned IntID) {
+  auto *ResultTy = getSVEType(TypeFlags);
+  auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags),
+                                             ResultTy->getElementCount());
+
+  // At the ACLE level there's only one predicate type, svbool_t, which is
+  // mapped to <n x 16 x i1>. However, this might be incompatible with the
+  // actual type being loaded. For example, when loading doubles (i64) the
+  // predicated should be <n x 2 x i1> instead. At the IR level the type of
+  // the predicate and the data being loaded must match. Cast accordingly.
+  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
+
+  Function *F = nullptr;
+  if (Ops[1]->getType()->isVectorTy())
+    // This is the "vector base, scalar offset" case. In order to uniquely
+    // map this built-in to an LLVM IR intrinsic, we need both the return type
+    // and the type of the vector base.
+    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
+  else
+    // This is the "scalar base, vector offset case". The type of the offset
+    // is encoded in the name of the intrinsic. We only need to specify the
+    // return type in order to uniquely map this built-in to an LLVM IR
+    // intrinsic.
+    F = CGM.getIntrinsic(IntID, OverloadedTy);
+
+  // Pass 0 when the offset is missing. This can only be applied when using
+  // the "vector base" addressing mode for which ACLE allows no offset. The
+  // corresponding LLVM IR always requires an offset.
+  if (Ops.size() == 2) {
+    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
+    Ops.push_back(ConstantInt::get(Int64Ty, 0));
+  }
+
+  // For "vector base, scalar index" scale the index so that it becomes a
+  // scalar offset.
+  if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
+    unsigned BytesPerElt =
+        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
+    Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+    Ops[2] = Builder.CreateMul(Ops[2], Scale);
+  }
+
+  Value *Call = Builder.CreateCall(F, Ops);
+
+  // The following sext/zext is only needed when ResultTy != OverloadedTy. In
+  // other cases it's folded into a nop.
+  return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
+                                  : Builder.CreateSExt(Call, ResultTy);
+}
+
+Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
+                                            SmallVectorImpl<Value *> &Ops,
+                                            unsigned IntID) {
+  auto *SrcDataTy = getSVEType(TypeFlags);
+  auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags),
+                                             SrcDataTy->getElementCount());
+
+  // In ACLE the source data is passed in the last argument, whereas in LLVM IR
+  // it's the first argument. Move it accordingly.
+  Ops.insert(Ops.begin(), Ops.pop_back_val());
+
+  Function *F = nullptr;
+  if (Ops[2]->getType()->isVectorTy())
+    // This is the "vector base, scalar offset" case. In order to uniquely
+    // map this built-in to an LLVM IR intrinsic, we need both the return type
+    // and the type of the vector base.
+    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
+  else
+    // This is the "scalar base, vector offset case". The type of the offset
+    // is encoded in the name of the intrinsic. We only need to specify the
+    // return type in order to uniquely map this built-in to an LLVM IR
+    // intrinsic.
+    F = CGM.getIntrinsic(IntID, OverloadedTy);
+
+  // Pass 0 when the offset is missing. This can only be applied when using
+  // the "vector base" addressing mode for which ACLE allows no offset. The
+  // corresponding LLVM IR always requires an offset.
+  if (Ops.size() == 3) {
+    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
+    Ops.push_back(ConstantInt::get(Int64Ty, 0));
+  }
+
+  // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
+  // folded into a nop.
+  Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
+
+  // At the ACLE level there's only one predicate type, svbool_t, which is
+  // mapped to <n x 16 x i1>. However, this might be incompatible with the
+  // actual type being stored. For example, when storing doubles (i64) the
+  // predicated should be <n x 2 x i1> instead. At the IR level the type of
+  // the predicate and the data being stored must match. Cast accordingly.
+  Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+
+  // For "vector base, scalar index" scale the index so that it becomes a
+  // scalar offset.
+  if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
+    unsigned BytesPerElt =
+        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
+    Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+    Ops[3] = Builder.CreateMul(Ops[3], Scale);
+  }
+
+  return Builder.CreateCall(F, Ops);
+}
+
 Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
                                           llvm::Type *ReturnTy,
                                           SmallVectorImpl<Value *> &Ops,
@@ -7639,8 +7794,12 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
                              TypeFlags.isZExtReturn());
   else if (TypeFlags.isStore())
     return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
+  else if (TypeFlags.isGatherLoad())
+    return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+  else if (TypeFlags.isScatterStore())
+    return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
   else if (Builtin->LLVMIntrinsic != 0) {
-    llvm::Type* OverloadedTy = getSVEType(TypeFlags);
+    llvm::VectorType *OverloadedTy = getSVEType(TypeFlags);
 
     if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
       InsertExplicitZeroOperand(Builder, Ty, Ops);

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 2429f5d4a445..786117a1429e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3903,9 +3903,21 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
                                  llvm::Type *Ty, bool usgn, const char *name);
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
+  /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
+  /// access builtin.  Only required if it can't be inferred from the base
+  /// pointer operand.
+  llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags);
 
-  llvm::Type *getSVEType(const SVETypeFlags &TypeFlags);
+  llvm::Type *getEltType(SVETypeFlags TypeFlags);
+
+  llvm::VectorType *getSVEType(const SVETypeFlags &TypeFlags);
   llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::VectorType *VTy);
+  llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+                                 llvm::SmallVectorImpl<llvm::Value *> &Ops,
+                                 unsigned IntID);
+  llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags,
+                                   llvm::SmallVectorImpl<llvm::Value *> &Ops,
+                                   unsigned IntID);
   llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy,
                                  SmallVectorImpl<llvm::Value *> &Ops,
                                  unsigned BuiltinID, bool IsZExtReturn);

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
index 5ff56627048b..e38fd569fcf4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
@@ -117,6 +117,7 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
   // CHECK: ret <vscale x 2 x double> %[[LOAD]]
   return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base);
 }
+
 svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 {
   // CHECK-LABEL: test_svld1_vnum_s8
@@ -235,3 +236,345 @@ svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum
   // CHECK: ret <vscale x 2 x double> %[[LOAD]]
   return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum);
 }
+
+svint32_t test_svld1_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u32base_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svld1_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svld1_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u32base_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svld1_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _u64, )(pg, bases);
+}
+
+svfloat32_t test_svld1_gather_u32base_f32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u32base_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _f32, )(pg, bases);
+}
+
+svfloat64_t test_svld1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1_gather_u64base_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _f64, )(pg, bases);
+}
+
+svint32_t test_svld1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _s32)(pg, base, offsets);
+}
+
+svint64_t test_svld1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _s64)(pg, base, offsets);
+}
+
+svuint32_t test_svld1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _u32)(pg, base, offsets);
+}
+
+svuint64_t test_svld1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat32_t test_svld1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s32offset_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _f32)(pg, base, offsets);
+}
+
+svfloat64_t test_svld1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_s64offset_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svld1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _s32)(pg, base, offsets);
+}
+
+svint64_t test_svld1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _s64)(pg, base, offsets);
+}
+
+svuint32_t test_svld1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _u32)(pg, base, offsets);
+}
+
+svuint64_t test_svld1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat32_t test_svld1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u32offset_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _f32)(pg, base, offsets);
+}
+
+svfloat64_t test_svld1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1_gather_u64offset_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svld1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u32base_offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svld1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svld1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u32base_offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svfloat32_t test_svld1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u32base_offset_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_f32, )(pg, bases, offset);
+}
+
+svfloat64_t test_svld1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1_gather_u64base_offset_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_f64, )(pg, bases, offset);
+}
+
+svint32_t test_svld1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, index, _s32)(pg, base, indices);
+}
+
+svint64_t test_svld1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, index, _s64)(pg, base, indices);
+}
+
+svuint32_t test_svld1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, index, _u32)(pg, base, indices);
+}
+
+svuint64_t test_svld1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, index, _u64)(pg, base, indices);
+}
+
+svfloat32_t test_svld1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s32index_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s32, index, _f32)(pg, base, indices);
+}
+
+svfloat64_t test_svld1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_s64index_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, s64, index, _f64)(pg, base, indices);
+}
+
+svint32_t test_svld1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, index, _s32)(pg, base, indices);
+}
+
+svint64_t test_svld1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, index, _s64)(pg, base, indices);
+}
+
+svuint32_t test_svld1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, index, _u32)(pg, base, indices);
+}
+
+svuint64_t test_svld1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, index, _u64)(pg, base, indices);
+}
+
+svfloat32_t test_svld1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u32index_f32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %[[PG]], float* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u32, index, _f32)(pg, base, indices);
+}
+
+svfloat64_t test_svld1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1_gather_u64index_f64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %[[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather_, u64, index, _f64)(pg, base, indices);
+}
+
+svint32_t test_svld1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u32base_index_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svld1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u64base_index_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svld1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u32base_index_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 4 x i32> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svld1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u64base_index_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 2 x i64> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_u64, )(pg, bases, index);
+}
+
+svfloat32_t test_svld1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u32base_index_f32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 4 x float> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_f32, )(pg, bases, index);
+}
+
+svfloat64_t test_svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1_gather_u64base_index_f64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: ret <vscale x 2 x double> %[[LOAD]]
+  return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_f64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
index e9c3a8c9dd6c..dc6637412e96 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
@@ -147,3 +147,147 @@ svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svld1sb_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svld1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1sb_gather_u32base_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svld1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sb_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svld1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1sb_gather_u32base_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svld1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sb_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svld1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_s32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_s32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_u32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_u32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sb_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sb_gather_u32base_offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svld1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sb_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svld1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sb_gather_u32base_offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sb_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
index a62f86cd91ff..bc38a44c4c20 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
@@ -101,3 +101,259 @@ svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svld1sh_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svld1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svld1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_s64
+  // CHECK: %[[PG.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svld1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svld1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svld1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_s32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_s32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_u32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_u64offset_s64
+  // CHECK: %[[PG]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_u32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sh_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svld1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svld1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %2
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint32_t test_svld1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_s32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svld1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_s64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svld1sh_gather_s32index_u32(svbool_t pg, const int16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_s32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svld1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_s64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svld1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_u32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svld1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_u64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svld1sh_gather_u32index_u32(svbool_t pg, const int16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_u32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svld1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1sh_gather_u64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]]  = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]]  = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svld1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_index_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svld1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_index_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svld1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sh_gather_u32base_index_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svld1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sh_gather_u64base_index_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
index 79af6fd339aa..8bb18009f8da 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
@@ -55,3 +55,131 @@ svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svld1sw_vnum_u64(pg, base, vnum);
 }
+
+svint64_t test_svld1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svld1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svld1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sw_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1sw_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sw_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1sw_gather_u64offset_u64
+  // CHECK: %[[PG]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svld1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svld1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1sw_gather_s64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svld1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1sw_gather_s64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svld1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1sw_gather_u64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]]  = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]]  = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svld1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1sw_gather_u64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svld1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_index_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svld1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1sw_gather_u64base_index_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[SEXT:.*]] = sext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: <vscale x 2 x i64> %[[SEXT]]
+  return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
index fa430cc7bf50..31fda97f37c3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
@@ -147,3 +147,147 @@ svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svld1ub_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svld1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1ub_gather_u32base_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svld1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1ub_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svld1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1ub_gather_u32base_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svld1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1ub_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svld1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_s32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_s32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_u32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_u32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1ub_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1ub_gather_u32base_offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svld1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1ub_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svld1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1ub_gather_u32base_offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i8> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1ub_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i8> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
index 32de2decd8db..0cf1378c370d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
@@ -101,3 +101,259 @@ svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svld1uh_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svld1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svld1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svld1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: et <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svld1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svld1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_s32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %1 to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_s32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %1 to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_u32offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svld1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svld1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_u32offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1uh_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %1 to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svld1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_offset_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svld1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svld1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_offset_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %2
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint32_t test_svld1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_s32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svld1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_s64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svld1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_s32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svld1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_s64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svld1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_u32index_s32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svld1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_u64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svld1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_u32index_u32
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %[[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svld1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1uh_gather_u64index_u64
+  // CHECK: %[[PG]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %[[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svld1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_index_s32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svld1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_index_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svld1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uh_gather_u32base_index_u32
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svld1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uh_gather_u64base_index_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i16> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
index 1c6275712611..b2f21dc66d3b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
@@ -55,3 +55,131 @@ svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svld1uw_vnum_u64(pg, base, vnum);
 }
+
+svint64_t test_svld1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svld1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svld1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1uw_gather_s64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svld1uw_gather_s64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svld1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1uw_gather_u64offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svld1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svld1uw_gather_u64offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svld1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_offset_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svld1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_offset_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svld1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1uw_gather_s64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %1 to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svld1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svld1uw_gather_s64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %1 to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svld1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1uw_gather_u64index_s64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svld1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svld1uw_gather_u64index_u64
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %[[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svld1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_index_s64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svld1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svld1uw_gather_u64base_index_u64
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: %[[LOAD:.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]])
+  // CHECK: %[[ZEXT:.*]] = zext <vscale x 2 x i32> %[[LOAD]] to <vscale x 2 x i64>
+  // CHECK: <vscale x 2 x i64> %[[ZEXT]]
+  return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
index 9b2b3f3117a0..291d7c682501 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
@@ -225,3 +225,345 @@ svfloat64_t test_svldff1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn
   // CHECK: ret <vscale x 2 x double> %[[LOAD]]
   return SVE_ACLE_FUNC(svldff1_vnum,_f64,,)(pg, base, vnum);
 }
+
+svint32_t test_svldff1_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldff1_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldff1_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldff1_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _u64, )(pg, bases);
+}
+
+svfloat32_t test_svldff1_gather_u32base_f32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _f32, )(pg, bases);
+}
+
+svfloat64_t test_svldff1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _f64, )(pg, bases);
+}
+
+svint32_t test_svldff1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _s32)(pg, base, offsets);
+}
+
+svint64_t test_svldff1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _s64)(pg, base, offsets);
+}
+
+svuint32_t test_svldff1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _u32)(pg, base, offsets);
+}
+
+svuint64_t test_svldff1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat32_t test_svldff1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s32offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _f32)(pg, base, offsets);
+}
+
+svfloat64_t test_svldff1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_s64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svldff1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _s32)(pg, base, offsets);
+}
+
+svint64_t test_svldff1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _s64)(pg, base, offsets);
+}
+
+svuint32_t test_svldff1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _u32)(pg, base, offsets);
+}
+
+svuint64_t test_svldff1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat32_t test_svldff1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u32offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _f32)(pg, base, offsets);
+}
+
+svfloat64_t test_svldff1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1_gather_u64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svldff1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldff1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svfloat32_t test_svldff1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_f32, )(pg, bases, offset);
+}
+
+svfloat64_t test_svldff1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_f64, )(pg, bases, offset);
+}
+
+svint32_t test_svldff1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _s32)(pg, base, indices);
+}
+
+svint64_t test_svldff1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _s64)(pg, base, indices);
+}
+
+svuint32_t test_svldff1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _u32)(pg, base, indices);
+}
+
+svuint64_t test_svldff1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _u64)(pg, base, indices);
+}
+
+svfloat32_t test_svldff1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s32index_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _f32)(pg, base, indices);
+}
+
+svfloat64_t test_svldff1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_s64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _f64)(pg, base, indices);
+}
+
+svint32_t test_svldff1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _s32)(pg, base, indices);
+}
+
+svint64_t test_svldff1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _s64)(pg, base, indices);
+}
+
+svuint32_t test_svldff1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _u32)(pg, base, indices);
+}
+
+svuint64_t test_svldff1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _u64)(pg, base, indices);
+}
+
+svfloat32_t test_svldff1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u32index_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %indices)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _f32)(pg, base, indices);
+}
+
+svfloat64_t test_svldff1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1_gather_u64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _f64)(pg, base, indices);
+}
+
+svint32_t test_svldff1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldff1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldff1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldff1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_u64, )(pg, bases, index);
+}
+
+svfloat32_t test_svldff1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u32base_index_f32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_f32, )(pg, bases, index);
+}
+
+svfloat64_t test_svldff1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1_gather_u64base_index_f64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_f64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
index c32ba7a218c1..6d5a964e1cf7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
@@ -141,3 +141,147 @@ svuint64_t test_svldff1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svldff1sb_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svldff1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldff1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldff1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldff1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svldff1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_s32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_s32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldff1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sb_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sb_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
index 4921a408eaf3..3c60dcb8b295 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
@@ -1,7 +1,15 @@
 // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
 
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
 svint32_t test_svldff1sh_s32(svbool_t pg, const int16_t *base)
 {
   // CHECK-LABEL: test_svldff1sh_s32
@@ -89,3 +97,259 @@ svuint64_t test_svldff1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnu
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svldff1sh_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svldff1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldff1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldff1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldff1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svldff1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_s32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_s32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldff1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint32_t test_svldff1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_s32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svldff1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svldff1sh_gather_s32index_u32(svbool_t pg, const int16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_s32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldff1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svldff1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svldff1sh_gather_u32index_u32(svbool_t pg, const int16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldff1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldff1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldff1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sh_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldff1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sh_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
index 7b65d32d1315..6c65979c32d4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
@@ -1,7 +1,15 @@
 // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
 
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
 svint64_t test_svldff1sw_s64(svbool_t pg, const int32_t *base)
 {
   // CHECK-LABEL: test_svldff1sw_s64
@@ -45,3 +53,131 @@ svuint64_t test_svldff1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnu
   // CHECK: ret <vscale x 2 x i64> %[[SEXT]]
   return svldff1sw_vnum_u64(pg, base, vnum);
 }
+
+svint64_t test_svldff1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svldff1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldff1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sw_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sw_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1sw_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1sw_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldff1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldff1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svldff1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1sw_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
index c098adde24e9..a6c117970877 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
@@ -1,7 +1,15 @@
 // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
 
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
 svint16_t test_svldff1ub_s16(svbool_t pg, const uint8_t *base)
 {
   // CHECK-LABEL: test_svldff1ub_s16
@@ -133,3 +141,147 @@ svuint64_t test_svldff1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnu
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svldff1ub_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svldff1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldff1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldff1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldff1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svldff1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_s32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_s32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldff1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1ub_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1ub_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
index fd5211e6c831..b335f3184d4c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
@@ -1,7 +1,15 @@
 // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
 
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
 svint32_t test_svldff1uh_s32(svbool_t pg, const uint16_t *base)
 {
   // CHECK-LABEL: test_svldff1uh_s32
@@ -89,3 +97,259 @@ svuint64_t test_svldff1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vn
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svldff1uh_vnum_u64(pg, base, vnum);
 }
+
+svint32_t test_svldff1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldff1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldff1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldff1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint32_t test_svldff1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_s32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_s32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldff1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldff1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldff1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint32_t test_svldff1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_s32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svldff1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svldff1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base, svint32_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_s32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldff1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32index_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u32, index_s32, )(pg, base, indices);
+}
+
+svint64_t test_svldff1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint32_t test_svldff1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32index_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u32, index_u32, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldff1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldff1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldff1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uh_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldff1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uh_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
index bdcfca49afa9..0b47a0630d58 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
@@ -1,7 +1,15 @@
 // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 
 #include <arm_sve.h>
 
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
 svint64_t test_svldff1uw_s64(svbool_t pg, const uint32_t *base)
 {
   // CHECK-LABEL: test_svldff1uw_s64
@@ -45,3 +53,131 @@ svuint64_t test_svldff1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vn
   // CHECK: ret <vscale x 2 x i64> %[[ZEXT]]
   return svldff1uw_vnum_u64(pg, base, vnum);
 }
+
+svint64_t test_svldff1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svldff1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldff1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uw_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uw_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldff1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldff1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svldff1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldff1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1uw_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldff1uw_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldff1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldff1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldff1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svldff1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldff1uw_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
new file mode 100644
index 000000000000..1f00744afe51
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svldnt1_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_s32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldnt1_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldnt1_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_u32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldnt1_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _u64, )(pg, bases);
+}
+
+svfloat32_t test_svldnt1_gather_u32base_f32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_f32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_f32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _f32, )(pg, bases);
+}
+
+svfloat64_t test_svldnt1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_f64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _f64, )(pg, bases);
+}
+
+svint64_t test_svldnt1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _s64)(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat64_t test_svldnt1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_s64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64offset_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svldnt1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _s32)(pg, base, offsets);
+}
+
+svint64_t test_svldnt1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _s64)(pg, base, offsets);
+}
+
+svuint32_t test_svldnt1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _u32)(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _u64)(pg, base, offsets);
+}
+
+svfloat32_t test_svldnt1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u32offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32offset_f32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _f32)(pg, base, offsets);
+}
+
+svfloat64_t test_svldnt1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1_gather_u64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64offset_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _f64)(pg, base, offsets);
+}
+
+svint32_t test_svldnt1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldnt1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svfloat32_t test_svldnt1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_f32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_offset_f32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_f32, )(pg, bases, offset);
+}
+
+svfloat64_t test_svldnt1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_offset_f64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_offset_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_f64, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _s64)(pg, base, indices);
+}
+
+svuint64_t test_svldnt1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _u64)(pg, base, indices);
+}
+
+svfloat64_t test_svldnt1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_s64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_s64index_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _f64)(pg, base, indices);
+}
+
+svint64_t test_svldnt1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _s64)(pg, base, indices);
+}
+
+svuint64_t test_svldnt1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _u64)(pg, base, indices);
+}
+
+svfloat64_t test_svldnt1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1_gather_u64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64index_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _f64)(pg, base, indices);
+}
+
+svint32_t test_svldnt1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_index_s32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldnt1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldnt1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x i32> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_index_u32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldnt1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x i64> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_u64, )(pg, bases, index);
+}
+
+svfloat32_t test_svldnt1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u32base_index_f32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 4 x float> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_f32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u32base_index_f32'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_f32, )(pg, bases, index);
+}
+
+svfloat64_t test_svldnt1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1_gather_u64base_index_f64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret <vscale x 2 x double> [[LOAD]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1_gather_index_f64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1_gather_u64base_index_f64'}}
+  return SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_f64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
new file mode 100644
index 000000000000..cd87d4491bc2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
@@ -0,0 +1,167 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svldnt1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32base_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldnt1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldnt1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32base_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldnt1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldnt1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldnt1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sb_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sb_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sb_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
new file mode 100644
index 000000000000..a595f70a9572
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svldnt1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldnt1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldnt1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldnt1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldnt1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldnt1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sh_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_s64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sh_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_s64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldnt1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_index_s32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldnt1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldnt1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u32base_index_u32'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldnt1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sh_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sh_gather_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
new file mode 100644
index 000000000000..921861e4bcff
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
@@ -0,0 +1,169 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint64_t test_svldnt1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svldnt1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sw_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sw_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sw_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_s64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sw_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_s64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svldnt1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1sw_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[SEXT:%.*]] = sext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[SEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1sw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1sw_gather_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
new file mode 100644
index 000000000000..05fbfebb9a97
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
@@ -0,0 +1,167 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svldnt1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32base_s32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldnt1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldnt1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32base_u32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldnt1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldnt1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldnt1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i8> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1ub_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1ub_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1ub_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
new file mode 100644
index 000000000000..5627060b878b
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svldnt1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_s32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _s32, )(pg, bases);
+}
+
+svint64_t test_svldnt1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint32_t test_svldnt1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_u32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _u32, )(pg, bases);
+}
+
+svuint64_t test_svldnt1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u32, offset_s32, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint32_t test_svldnt1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u32, offset_u32, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint32_t test_svldnt1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _offset_s32, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint32_t test_svldnt1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _offset_u32, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uh_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_s64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uh_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_s64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint32_t test_svldnt1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_s32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_index_s32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_s32, )(pg, bases, index);
+}
+
+svint64_t test_svldnt1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint32_t test_svldnt1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 4 x i16> [[LOAD]] to <vscale x 4 x i32>
+  // CHECK: ret <vscale x 4 x i32> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_u32'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u32base_index_u32'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_u32, )(pg, bases, index);
+}
+
+svuint64_t test_svldnt1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uh_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i16> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uh_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uh_gather_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
new file mode 100644
index 000000000000..e64ec26be8de
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
@@ -0,0 +1,169 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint64_t test_svldnt1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _s64, )(pg, bases);
+}
+
+svuint64_t test_svldnt1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _u64, )(pg, bases);
+}
+
+svint64_t test_svldnt1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uw_gather_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, s64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uw_gather_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, s64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, u64, offset_s64, )(pg, base, offsets);
+}
+
+svuint64_t test_svldnt1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, u64, offset_u64, )(pg, base, offsets);
+}
+
+svint64_t test_svldnt1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _offset_s64, )(pg, bases, offset);
+}
+
+svuint64_t test_svldnt1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_offset_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _offset_u64, )(pg, bases, offset);
+}
+
+svint64_t test_svldnt1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uw_gather_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_s64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, s64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uw_gather_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_s64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, s64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, u64, index_s64, )(pg, base, indices);
+}
+
+svuint64_t test_svldnt1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather_, u64, index_u64, )(pg, base, indices);
+}
+
+svint64_t test_svldnt1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_s64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _index_s64, )(pg, bases, index);
+}
+
+svuint64_t test_svldnt1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
+  // CHECK-LABEL: test_svldnt1uw_gather_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: [[LOAD:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: [[ZEXT:%.*]] = zext <vscale x 2 x i32> [[LOAD]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> [[ZEXT]]
+  // overload-warning at +2 {{implicit declaration of function 'svldnt1uw_gather_index_u64'}}
+  // expected-warning at +1 {{implicit declaration of function 'svldnt1uw_gather_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _index_u64, )(pg, bases, index);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
new file mode 100644
index 000000000000..e088836648c1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+void test_svstnt1_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_s32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _s32)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _s64)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_u32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _u32)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _u64)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_u32base_f32(svbool_t pg, svuint32_t bases, svfloat32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_f32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _f32)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_u64base_f64(svbool_t pg, svuint64_t bases, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _f64)(pg, bases, data);
+}
+
+void test_svstnt1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64offset_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _f64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _s32)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], i32* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _u32)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> [[PG]], float* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32offset_f32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _f32)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64offset_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _f64)(pg, base, offsets, data);
+}
+
+void test_svstnt1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_offset_s32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _s32)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_offset_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _s64)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_offset_u32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _u32)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_offset_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _u64)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_offset_f32
+  // CHECK: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_offset_f32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _f32)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_offset_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_offset_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _f64)(pg, bases, offset, data);
+}
+
+void test_svstnt1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_s64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_s64index_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _f64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64index_s64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64index_u64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], i64* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64index_f64
+  // CHECK: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], double* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64index_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _f64)(pg, base, indices, data);
+}
+
+void test_svstnt1_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_index_s32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_index_s32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _s32)(pg, bases, index, data);
+}
+
+void test_svstnt1_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_index_s64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _s64)(pg, bases, index, data);
+}
+
+void test_svstnt1_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_index_u32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_index_u32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _u32)(pg, bases, index, data);
+}
+
+void test_svstnt1_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_index_u64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _u64)(pg, bases, index, data);
+}
+
+void test_svstnt1_scatter_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index, svfloat32_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u32base_index_f32
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u32base_index_f32'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _f32)(pg, bases, index, data);
+}
+
+void test_svstnt1_scatter_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index, svfloat64_t data) {
+  // CHECK-LABEL: test_svstnt1_scatter_u64base_index_f64
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1_scatter_u64base_index_f64'}}
+  return SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _f64)(pg, bases, index, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
new file mode 100644
index 000000000000..d94d6b4f34e3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
@@ -0,0 +1,167 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+void test_svstnt1b_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32base_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32base_s32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, , _s32)(pg, bases, data);
+}
+
+void test_svstnt1b_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64base_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64base_s64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, , _s64)(pg, bases, data);
+}
+
+void test_svstnt1b_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32base_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32base_u32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, , _u32)(pg, bases, data);
+}
+
+void test_svstnt1b_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64base_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64base_u64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, , _u64)(pg, bases, data);
+}
+
+void test_svstnt1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_s64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, s64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_s64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, s64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32offset_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, u32, offset, _s32)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, u64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32offset_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], i8* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, u32, offset, _u32)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], i8* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter_, u64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32base_offset_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _s32)(pg, bases, offset, data);
+}
+
+void test_svstnt1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64base_offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _s64)(pg, bases, offset, data);
+}
+
+void test_svstnt1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u32base_offset_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _u32)(pg, bases, offset, data);
+}
+
+void test_svstnt1b_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1b_scatter_u64base_offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1b_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1b_scatter_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _u64)(pg, bases, offset, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
new file mode 100644
index 000000000000..f457ada228f3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+void test_svstnt1h_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_s32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, , _s32)(pg, bases, data);
+}
+
+void test_svstnt1h_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, , _s64)(pg, bases, data);
+}
+
+void test_svstnt1h_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_u32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, , _u32)(pg, bases, data);
+}
+
+void test_svstnt1h_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, , _u64)(pg, bases, data);
+}
+
+void test_svstnt1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_s64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, s64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_s64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, s64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32offset_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u32, offset, _s32)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32offset_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], i16* %base, <vscale x 4 x i32> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u32, offset, _u32)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_offset_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_offset_s32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _offset, _s32)(pg, bases, offset, data);
+}
+
+void test_svstnt1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _offset, _s64)(pg, bases, offset, data);
+}
+
+void test_svstnt1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_offset_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_offset_u32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _offset, _u32)(pg, bases, offset, data);
+}
+
+void test_svstnt1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _offset, _u64)(pg, bases, offset, data);
+}
+
+void test_svstnt1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_s64index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_s64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, s64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_s64index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_s64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, s64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], i16* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter_, u64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1h_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_index_s32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_index_s32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _index, _s32)(pg, bases, index, data);
+}
+
+void test_svstnt1h_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _index, _s64)(pg, bases, index, data);
+}
+
+void test_svstnt1h_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u32base_index_u32
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TRUNC]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u32base_index_u32'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _index, _u32)(pg, bases, index, data);
+}
+
+void test_svstnt1h_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1h_scatter_u64base_index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1h_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1h_scatter_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _index, _u64)(pg, bases, index, data);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
new file mode 100644
index 000000000000..98f1ecd54407
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
@@ -0,0 +1,169 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+void test_svstnt1w_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, , _s64)(pg, bases, data);
+}
+
+void test_svstnt1w_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 0)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, , _u64)(pg, bases, data);
+}
+
+void test_svstnt1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_s64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_s64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, s64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_s64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_s64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, s64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, u64, offset, _s64)(pg, base, offsets, data);
+}
+
+void test_svstnt1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %offsets)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, u64, offset, _u64)(pg, base, offsets, data);
+}
+
+void test_svstnt1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_offset_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_offset_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _offset, _s64)(pg, bases, offset, data);
+}
+
+void test_svstnt1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_offset_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 %offset)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_offset'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_offset_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _offset, _u64)(pg, bases, offset, data);
+}
+
+void test_svstnt1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_s64index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_s64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, s64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_s64index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_s64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, s64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, u64, index, _s64)(pg, base, indices, data);
+}
+
+void test_svstnt1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], i32* %base, <vscale x 2 x i64> %indices)
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter_, u64, index, _u64)(pg, base, indices, data);
+}
+
+void test_svstnt1w_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_index_s64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_index_s64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _s64)(pg, bases, index, data);
+}
+
+void test_svstnt1w_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) {
+  // CHECK-LABEL: test_svstnt1w_scatter_u64base_index_u64
+  // CHECK-DAG: [[TRUNC:%.*]] = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
+  // CHECK-DAG: [[PG:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: [[SHL:%.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TRUNC]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> %bases, i64 [[SHL]])
+  // CHECK: ret void
+  // overload-warning at +2 {{implicit declaration of function 'svstnt1w_scatter_index'}}
+  // expected-warning at +1 {{implicit declaration of function 'svstnt1w_scatter_u64base_index_u64'}}
+  return SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _u64)(pg, bases, index, data);
+}

diff  --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 1e01f611bfa2..a32345e6a9d5 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -481,6 +481,11 @@ void SVEType::applyModifier(char Mod) {
     Signed = false;
     Float = false;
     break;
+  case 'x':
+    Predicate = false;
+    Signed = true;
+    Float = false;
+    break;
   case 'i':
     Predicate = false;
     Float = false;


        


More information about the cfe-commits mailing list