[clang] 14e7dac - [Clang][LLVM][AArch64]SVE2.1 update the intrinsics according to acle[1] (#76844)

Wed Jan 10 09:12:19 PST 2024

Author: CarolineConcatto
Date: 2024-01-10T17:12:14Z
New Revision: 14e7dac92a32f900a66cb868be89c964b687a825

URL: https://github.com/llvm/llvm-project/commit/14e7dac92a32f900a66cb868be89c964b687a825
DIFF: https://github.com/llvm/llvm-project/commit/14e7dac92a32f900a66cb868be89c964b687a825.diff

LOG: [Clang][LLVM][AArch64]SVE2.1 update the intrinsics according to acle[1] (#76844)

This patch changes the following intrinsic

 ```svst1uwq[_{d}]  replaced by svst1wq[_{d}]
 svst1uwq_vnum[_{d}] replaced by svst1wq_vnum[_{d}]
 svst1udq[_{d}]  replaced by svst1dq[_{d}]
 svst1udq_vnum[_{d}] replaced by svst1dq_vnum[_{d}]
```
Drops 'u' from the quadword stores because it is simply truncating the
quadwords to 32 bits

```
 svextq_lane[_{d}] replaced by  svextq[_{d}]
```
EXTQ follows the previous defined EXT intrinsics

```
 svdot[_{d}_{2}_{3}] replaced by svdot[_{d}_{2}]
```
Introduced with the latest SME2 ACLE change

[1]https://github.com/ARM-software/acle/pull/257

Added: 
    

Modified: 
    clang/include/clang/Basic/arm_sve.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
    clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
    clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c
    clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 7f80fb0386cc77..6f35e25617adea 100644

--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -454,11 +454,11 @@ let TargetGuard = "sve,bf16" in {
 
 let TargetGuard = "sve2p1" in {
   // Contiguous truncating store from quadword (single vector).
-  def SVST1UWQ      : MInst<"svst1uwq[_{d}]", "vPcd", "iUif",  [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">;
-  def SVST1UWQ_VNUM : MInst<"svst1uwq_vnum[_{d}]", "vPcld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">;
+  def SVST1UWQ      : MInst<"svst1wq[_{d}]", "vPcd", "iUif",  [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">;
+  def SVST1UWQ_VNUM : MInst<"svst1wq_vnum[_{d}]", "vPcld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">;
 
-  def SVST1UDQ      : MInst<"svst1udq[_{d}]", "vPcd", "lUld",  [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">;
-  def SVST1UDQ_VNUM : MInst<"svst1udq_vnum[_{d}]", "vPcld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">;
+  def SVST1UDQ      : MInst<"svst1dq[_{d}]", "vPcd", "lUld",  [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">;
+  def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPcld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">;
 
   // Store one vector (vector base + scalar offset)
   def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]",  "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
@@ -2040,12 +2040,12 @@ let TargetGuard = "sve2p1|sme2" in {
 }
 
 let TargetGuard = "sve2p1" in {
-def SVDOT_X2_S : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "i",  MergeNone, "aarch64_sve_sdot_x2", [], []>;
-def SVDOT_X2_U : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>;
-def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f",  MergeNone, "aarch64_sve_fdot_x2", [], []>;
-def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i",  MergeNone, "aarch64_sve_sdot_x2", [], []>;
+def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>;
+def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f",  MergeNone, "aarch64_sve_fdot_x2", [], []>;
+def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i",  MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 }
 
 let TargetGuard = "sve2p1|sme2" in {
@@ -2208,7 +2208,7 @@ let TargetGuard = "sve2p1" in {
   def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
   def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
   // EXTQ
-  def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
+  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
   // PMOV
   // Move to Pred
   multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f71dbf1729a1d6..1ed35befe1361f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9681,8 +9681,8 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
   bool IsQuadStore = false;
 
   switch (IntrinsicID) {
-  case Intrinsic::aarch64_sve_st1uwq:
-  case Intrinsic::aarch64_sve_st1udq:
+  case Intrinsic::aarch64_sve_st1wq:
+  case Intrinsic::aarch64_sve_st1dq:
     AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
     PredTy =
         llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);

diff  --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
index d01b59114d5429..035ba244f9441e 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
@@ -26,7 +26,7 @@
 //
 svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot,_s32_s16_s16,)(op1, op2, op3);
+  return SVE_ACLE_FUNC(svdot,_s32_s16,)(op1, op2, op3);
 }
 
 // CHECK-LABEL: @test_svdot_u32_x2(
@@ -41,7 +41,7 @@ svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
 //
 svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot,_u32_u16_u16,)(op1, op2, op3);
+  return SVE_ACLE_FUNC(svdot,_u32_u16,)(op1, op2, op3);
 }
 
 // CHECK-LABEL: @test_svdot_f32_x2(
@@ -56,7 +56,7 @@ svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
 //
 svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot,_f32_f16_f16,)(op1, op2, op3);
+  return SVE_ACLE_FUNC(svdot,_f32_f16,)(op1, op2, op3);
 }
 
 
@@ -73,7 +73,7 @@ svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
 //
 svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot_lane,_s32_s16_s16,)(op1, op2, op3, 3);
+  return SVE_ACLE_FUNC(svdot_lane,_s32_s16,)(op1, op2, op3, 3);
 }
 
 // CHECK-LABEL: @test_svdot_lane_u32_x2(
@@ -88,7 +88,7 @@ svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
 //
 svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot_lane,_u32_u16_u16,)(op1, op2, op3, 3);
+  return SVE_ACLE_FUNC(svdot_lane,_u32_u16,)(op1, op2, op3, 3);
 }
 
 // CHECK-LABEL: @test_svdot_lane_f32_x2(
@@ -103,5 +103,5 @@ svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3
 //
 svfloat32_t test_svdot_lane_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
 {
-  return SVE_ACLE_FUNC(svdot_lane,_f32_f16_f16,)(op1, op2, op3, 3);
+  return SVE_ACLE_FUNC(svdot_lane,_f32_f16,)(op1, op2, op3, 3);
 }

diff  --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
index 7704db5667a2a7..738b290b76cf59 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -20,194 +20,194 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
-// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svextq_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
-  return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
+svuint8_t test_svextq_u8(svuint8_t zn, svuint8_t zm) {
+  return SVE_ACLE_FUNC(svextq, _u8,,)(zn, zm, 0);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_s8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svextq_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
+svint8_t test_svextq_s8(svint8_t zn, svint8_t zm) {
+    return SVE_ACLE_FUNC(svextq, _s8,,)(zn, zm, 4);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_u16
 // CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svextq_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
-  return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
+svuint16_t test_svextq_u16(svuint16_t zn, svuint16_t zm) {
+  return SVE_ACLE_FUNC(svextq, _u16,,)(zn, zm, 1);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_s16
 // CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svextq_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
+svint16_t test_svextq_s16(svint16_t zn, svint16_t zm) {
+    return SVE_ACLE_FUNC(svextq, _s16,,)(zn, zm, 5);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_u32
 // CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svextq_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
-  return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
+svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) {
+  return SVE_ACLE_FUNC(svextq, _u32,,)(zn, zm, 2);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_s32
 // CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svextq_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
+svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) {
+    return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 6);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_u64
 // CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
-  return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
+svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) {
+  return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 3);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_s64
 // CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
+svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) {
+    return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 7);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_f16
 // CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svextq_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
+svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) {
+    return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 8);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_f32
 // CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svextq_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
+svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) {
+    return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 9);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_f64
 // CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svextq_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
+svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) {
+    return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 10);
 }
 
-// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_bf16
 // CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBfloat16_tS_
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svextq_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
+svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 11);
 }

diff  --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c
index 52c16faec7f32b..27f7b8be7f1842 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c
@@ -23,233 +23,233 @@
 
 // ST1W
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_u32
+// CHECK-LABEL: define dso_local void @test_svst1wq_u32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_u32u10__SVBool_tPKju12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1wq_u32u10__SVBool_tPKju12__SVUint32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq, _u32, , )(pred, base, zt);
+void test_svst1wq_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) {
+  SVE_ACLE_FUNC(svst1wq, _u32, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_u32
+// CHECK-LABEL: define dso_local void @test_svst1wq_vnum_u32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i32>, ptr [[BASE]], i64 1
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_u32u10__SVBool_tPKju12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1wq_vnum_u32u10__SVBool_tPKju12__SVUint32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i32>, ptr [[BASE]], i64 1
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_vnum_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq_vnum, _u32, , )(pred, base, 1, zt);
+void test_svst1wq_vnum_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) {
+  SVE_ACLE_FUNC(svst1wq_vnum, _u32, , )(pred, base, 1, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_s32
+// CHECK-LABEL: define dso_local void @test_svst1wq_s32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_s32u10__SVBool_tPKiu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1wq_s32u10__SVBool_tPKiu11__SVInt32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_s32(svbool_t pred, int32_t const * base, svint32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq, _s32, , )(pred, base, zt);
+void test_svst1wq_s32(svbool_t pred, int32_t const * base, svint32_t zt) {
+  SVE_ACLE_FUNC(svst1wq, _s32, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_s32
+// CHECK-LABEL: define dso_local void @test_svst1wq_vnum_s32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i32>, ptr [[BASE]], i64 1
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_s32u10__SVBool_tPKiu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1wq_vnum_s32u10__SVBool_tPKiu11__SVInt32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i32>, ptr [[BASE]], i64 1
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_vnum_s32(svbool_t pred, int32_t const * base, svint32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq_vnum, _s32, , )(pred, base, 1, zt);
+void test_svst1wq_vnum_s32(svbool_t pred, int32_t const * base, svint32_t zt) {
+  SVE_ACLE_FUNC(svst1wq_vnum, _s32, , )(pred, base, 1, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_f32
+// CHECK-LABEL: define dso_local void @test_svst1wq_f32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x float> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_f32u10__SVBool_tPKfu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1wq_f32u10__SVBool_tPKfu13__SVFloat32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x float> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq, _f32, , )(pred, base, zt);
+void test_svst1wq_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) {
+  SVE_ACLE_FUNC(svst1wq, _f32, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_f32
+// CHECK-LABEL: define dso_local void @test_svst1wq_vnum_f32
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x float> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x float>, ptr [[BASE]], i64 1
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_f32u10__SVBool_tPKfu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1wq_vnum_f32u10__SVBool_tPKfu13__SVFloat32_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 4 x float> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x float>, ptr [[BASE]], i64 1
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1uwq_vnum_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) {
-  SVE_ACLE_FUNC(svst1uwq_vnum, _f32, , )(pred, base, 1, zt);
+void test_svst1wq_vnum_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) {
+  SVE_ACLE_FUNC(svst1wq_vnum, _f32, , )(pred, base, 1, zt);
 }
 
 
 // ST1D
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_u64
+// CHECK-LABEL: define dso_local void @test_svst1dq_u64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_u64u10__SVBool_tPKmu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1dq_u64u10__SVBool_tPKmu12__SVUint64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) {
-  SVE_ACLE_FUNC(svst1udq, _u64, , )(pred, base, zt);
+void test_svst1dq_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) {
+  SVE_ACLE_FUNC(svst1dq, _u64, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_u64
+// CHECK-LABEL: define dso_local void @test_svst1dq_vnum_u64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE]], i64 -8
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_u64u10__SVBool_tPKmu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1dq_vnum_u64u10__SVBool_tPKmu12__SVUint64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE]], i64 -8
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_vnum_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) {
-  SVE_ACLE_FUNC(svst1udq_vnum, _u64, , )(pred, base, -8, zt);
+void test_svst1dq_vnum_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) {
+  SVE_ACLE_FUNC(svst1dq_vnum, _u64, , )(pred, base, -8, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_s64
+// CHECK-LABEL: define dso_local void @test_svst1dq_s64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_s64u10__SVBool_tPKlu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1dq_s64u10__SVBool_tPKlu11__SVInt64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_s64(svbool_t pred, int64_t const * base, svint64_t zt) {
-  SVE_ACLE_FUNC(svst1udq, _s64, , )(pred, base, zt);
+void test_svst1dq_s64(svbool_t pred, int64_t const * base, svint64_t zt) {
+  SVE_ACLE_FUNC(svst1dq, _s64, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_s64
+// CHECK-LABEL: define dso_local void @test_svst1dq_vnum_s64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE]], i64 -8
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_s64u10__SVBool_tPKlu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1dq_vnum_s64u10__SVBool_tPKlu11__SVInt64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE]], i64 -8
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_vnum_s64(svbool_t pred, int64_t const * base, svint64_t zt) {
-  SVE_ACLE_FUNC(svst1udq_vnum, _s64, , )(pred, base, -8, zt);
+void test_svst1dq_vnum_s64(svbool_t pred, int64_t const * base, svint64_t zt) {
+  SVE_ACLE_FUNC(svst1dq_vnum, _s64, , )(pred, base, -8, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_f64
+// CHECK-LABEL: define dso_local void @test_svst1dq_f64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x double> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_f64u10__SVBool_tPKdu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z16test_svst1dq_f64u10__SVBool_tPKdu13__SVFloat64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x double> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) {
-  SVE_ACLE_FUNC(svst1udq, _f64, , )(pred, base, zt);
+void test_svst1dq_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) {
+  SVE_ACLE_FUNC(svst1dq, _f64, , )(pred, base, zt);
 }
 
-// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_f64
+// CHECK-LABEL: define dso_local void @test_svst1dq_vnum_f64
 // CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x double> [[ZT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x double>, ptr [[BASE]], i64 -8
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_f64u10__SVBool_tPKdu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local void @_Z21test_svst1dq_vnum_f64u10__SVBool_tPKdu13__SVFloat64_t
 // CPP-CHECK-SAME: (<vscale x 16 x i1> [[PRED:%.*]], ptr noundef [[BASE:%.*]], <vscale x 2 x double> [[ZT:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PRED]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 1 x double>, ptr [[BASE]], i64 -8
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> [[ZT]], <vscale x 1 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1udq_vnum_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) {
-  SVE_ACLE_FUNC(svst1udq_vnum, _f64, , )(pred, base, -8, zt);
+void test_svst1dq_vnum_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) {
+  SVE_ACLE_FUNC(svst1dq_vnum, _f64, , )(pred, base, -8, zt);
 }

diff  --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index a3ec4c5b8b1bc7..a6154daadeeab3 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -114,9 +114,9 @@ void test_cntp(svcount_t c) {
 
 void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16_t u16,
                           svfloat32_t f32, svfloat16_t f16) {
-  svdot_lane_s32_s16_s16(s32, s16, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svdot_lane_u32_u16_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  svdot_lane_f32_f16_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_s32_s16(s32, s16, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_u32_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_f32_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }
 
 
@@ -139,8 +139,8 @@ void test_svbfmul_lane(svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
 
 __attribute__((target("+sve2p1")))
 void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
-  svextq_lane_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
-  svextq_lane_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+  svextq_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  svextq_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
 }
 
 __attribute__((target("+sve2p1")))

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 9088168b4c67b1..acff5c20b1b9a0 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2708,8 +2708,8 @@ class SVE2p1_Single_Store_Quadword
     : DefaultAttrsIntrinsic<[],
                             [llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty],
                             [IntrWriteMem, IntrArgMemOnly]>;
-def int_aarch64_sve_st1uwq : SVE2p1_Single_Store_Quadword;
-def int_aarch64_sve_st1udq : SVE2p1_Single_Store_Quadword;
+def int_aarch64_sve_st1wq : SVE2p1_Single_Store_Quadword;
+def int_aarch64_sve_st1dq : SVE2p1_Single_Store_Quadword;
 
 
 def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
@@ -3617,7 +3617,7 @@ def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic;
 
 // SVE2.1 - Extract vector segment from each pair of quadword segments.
 //
-def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic;
 
 //
 // SVE2.1 - Move predicate to/from vector

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index edc8cc7d4d1e69..ea5679b4d5e39b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
     return getPackedVectorTypeFromPredicateType(
         Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
   case Intrinsic::aarch64_sve_ld1udq:
-  case Intrinsic::aarch64_sve_st1udq:
+  case Intrinsic::aarch64_sve_st1dq:
     return EVT(MVT::nxv1i64);
   case Intrinsic::aarch64_sve_ld1uwq:
-  case Intrinsic::aarch64_sve_st1uwq:
+  case Intrinsic::aarch64_sve_st1wq:
     return EVT(MVT::nxv1i32);
   }
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ee10a7d1c706fc..26102f922d99fe 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in {
                         (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
   }
 
-  // ld1quw/st1quw
+  // ld1quw/st1qw
   defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
   defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
-  defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
-  defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+  defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq,  ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+  defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq,  ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
 
-  // ld1qud/st1qud
+  // ld1qud/st1qd
   defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
   defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
-  defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
-  defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+  defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq,  ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+  defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq,  ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
 
 } // End HasSVEorSME
 
@@ -4095,7 +4095,7 @@ defm FMAXQV   : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
 defm FMINQV   : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
 
 defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
 
 defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
 defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
index efe19432f9c32e..a49aa7cfcf8a2d 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
@@ -6,7 +6,7 @@ define <vscale x 16 x i8> @test_extq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #0
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 0)
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 0)
   ret <vscale x 16 x i8> %res
 }
 
@@ -15,7 +15,7 @@ define <vscale x 8 x i16> @test_extq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #1
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm, i32 1)
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm, i32 1)
   ret <vscale x 8 x i16> %res
 }
 
@@ -24,7 +24,7 @@ define <vscale x 4 x i32> @test_extq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #2
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm, i32 2)
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm, i32 2)
   ret <vscale x 4 x i32> %res
 }
 
@@ -33,7 +33,7 @@ define <vscale x 2 x i64> @test_extq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #3
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 3)
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 3)
   ret <vscale x 2 x i64> %res
 }
 
@@ -42,7 +42,7 @@ define <vscale x 8 x half> @test_extq_f16(<vscale x 8 x half> %zn, <vscale x 8 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #4
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 4)
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 4)
   ret <vscale x 8 x half> %res
 }
 
@@ -51,7 +51,7 @@ define <vscale x 4 x float> @test_extq_f32(<vscale x 4 x float> %zn, <vscale x 4
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #5
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 5)
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 5)
   ret <vscale x 4 x float> %res
 }
 
@@ -60,7 +60,7 @@ define <vscale x 2 x double> @test_extq_f64(<vscale x 2 x double> %zn, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #6
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 6)
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 6)
   ret <vscale x 2 x double> %res
 }
 
@@ -69,15 +69,15 @@ define <vscale x 8 x bfloat> @test_extq_bf16(<vscale x 8 x bfloat> %zn, <vscale
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #15
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 15)
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 15)
   ret <vscale x 8 x bfloat> %res
 }
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
-declare <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
-declare <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll
index e93673c79c30a3..894c647453f54b 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll
@@ -3,128 +3,128 @@
 
 ; ST1W
 
-define void @test_svst1uwq_i32_ss(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
-; CHECK-LABEL: test_svst1uwq_i32_ss:
+define void @test_svst1wq_i32_ss(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
+; CHECK-LABEL: test_svst1wq_i32_ss:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %gep = getelementptr i32, ptr %base, i64 %offset
-  call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1uwq_i32_si(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i32>* %base) {
-; CHECK-LABEL: test_svst1uwq_i32_si:
+define void @test_svst1wq_i32_si(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i32>* %base) {
+; CHECK-LABEL: test_svst1wq_i32_si:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, #-8, mul vl]
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, #7, mul vl]
 ; CHECK-NEXT:    ret
   %gep1 = getelementptr inbounds <vscale x 1 x i32>, <vscale x 1 x i32>* %base, i64 -8
-  call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
+  call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
 
   %gep2 = getelementptr inbounds <vscale x 1 x i32>, <vscale x 1 x i32>* %base, i64 7
-  call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
+  call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
   ret void
 }
 
-define void @test_svst1uwq_i32_out_of_bound(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i32>* %base) {
-; CHECK-LABEL: test_svst1uwq_i32_out_of_bound:
+define void @test_svst1wq_i32_out_of_bound(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i32>* %base) {
+; CHECK-LABEL: test_svst1wq_i32_out_of_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #2
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x8]
 ; CHECK-NEXT:    ret
   %gep = getelementptr inbounds <vscale x 1 x i32>, <vscale x 1 x i32>* %base, i64 8
-  call void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1uwq_f32_ss(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
-; CHECK-LABEL: test_svst1uwq_f32_ss:
+define void @test_svst1wq_f32_ss(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
+; CHECK-LABEL: test_svst1wq_f32_ss:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %gep = getelementptr float, ptr %base, i64 %offset
-  call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1uwq_f32_si(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x float>* %base) {
-; CHECK-LABEL: test_svst1uwq_f32_si:
+define void @test_svst1wq_f32_si(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x float>* %base) {
+; CHECK-LABEL: test_svst1wq_f32_si:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, #-8, mul vl]
 ; CHECK-NEXT:    st1w { z0.q }, p0, [x0, #7, mul vl]
 ; CHECK-NEXT:    ret
   %gep1 = getelementptr inbounds <vscale x 1 x float>, <vscale x 1 x float>* %base, i64 -8
-  call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
+  call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
 
   %gep2 = getelementptr inbounds <vscale x 1 x float>, <vscale x 1 x float>* %base, i64 7
-  call void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
+  call void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
   ret void
 }
 
 ; ST1D
 
-define void @test_svst1udq_i64_ss(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
-; CHECK-LABEL: test_svst1udq_i64_ss:
+define void @test_svst1dq_i64_ss(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
+; CHECK-LABEL: test_svst1dq_i64_ss:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %gep = getelementptr i64, ptr  %base, i64 %offset
-  call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1udq_i64_si(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i64>* %base) {
-; CHECK-LABEL: test_svst1udq_i64_si:
+define void @test_svst1dq_i64_si(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i64>* %base) {
+; CHECK-LABEL: test_svst1dq_i64_si:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, #-8, mul vl]
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, #7, mul vl]
 ; CHECK-NEXT:    ret
   %gep1 = getelementptr inbounds <vscale x 1 x i64>, <vscale x 1 x i64>* %base, i64 -8
-  call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
+  call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
 
   %gep2 = getelementptr inbounds <vscale x 1 x i64>, <vscale x 1 x i64>* %base, i64 7
-  call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
+  call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
   ret void
 }
 
-define void @test_svst1udq_i64_out_of_bound(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i64>* %base) {
-; CHECK-LABEL: test_svst1udq_i64_out_of_bound:
+define void @test_svst1dq_i64_out_of_bound(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x i64>* %base) {
+; CHECK-LABEL: test_svst1dq_i64_out_of_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addvl x8, x0, #-5
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x8]
 ; CHECK-NEXT:    ret
   %gep = getelementptr inbounds <vscale x 1 x i64>, <vscale x 1 x i64>* %base, i64 -10
-  call void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1udq_f64_ss(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
-; CHECK-LABEL: test_svst1udq_f64_ss:
+define void @test_svst1dq_f64_ss(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
+; CHECK-LABEL: test_svst1dq_f64_ss:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %gep = getelementptr double, ptr  %base, i64 %offset
-  call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep)
+  call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep)
   ret void
 }
 
-define void @test_svst1udq_f64_si(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x double>* %base) {
-; CHECK-LABEL: test_svst1udq_f64_si:
+define void @test_svst1dq_f64_si(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, <vscale x 1 x double>* %base) {
+; CHECK-LABEL: test_svst1dq_f64_si:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, #-8, mul vl]
 ; CHECK-NEXT:    st1d { z0.q }, p0, [x0, #7, mul vl]
 ; CHECK-NEXT:    ret
   %gep1 = getelementptr inbounds <vscale x 1 x double>, <vscale x 1 x double>* %base, i64 -8
-  call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
+  call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep1)
 
   %gep2 = getelementptr inbounds <vscale x 1 x double>, <vscale x 1 x double>* %base, i64 7
-  call void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
+  call void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double> %zt, <vscale x 1 x i1> %pred, ptr %gep2)
   ret void
 }
 
-declare void @llvm.aarch64.sve.st1uwq.nxv4i32(<vscale x 4 x i32>, <vscale x 1 x i1>, ptr)
-declare void @llvm.aarch64.sve.st1uwq.nxv4f32(<vscale x 4 x float>, <vscale x 1 x i1>, ptr)
+declare void @llvm.aarch64.sve.st1wq.nxv4i32(<vscale x 4 x i32>, <vscale x 1 x i1>, ptr)
+declare void @llvm.aarch64.sve.st1wq.nxv4f32(<vscale x 4 x float>, <vscale x 1 x i1>, ptr)
 
-declare void @llvm.aarch64.sve.st1udq.nxv2i64(<vscale x 2 x i64>, <vscale x 1 x i1>, ptr)
-declare void @llvm.aarch64.sve.st1udq.nxv2f64(<vscale x 2 x double>, <vscale x 1 x i1>, ptr)
+declare void @llvm.aarch64.sve.st1dq.nxv2i64(<vscale x 2 x i64>, <vscale x 1 x i1>, ptr)
+declare void @llvm.aarch64.sve.st1dq.nxv2f64(<vscale x 2 x double>, <vscale x 1 x i1>, ptr)