[clang] [llvm] [SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and LLVM intrinisc (PR #71930)

via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 20 09:47:58 PST 2023


https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/71930

>From 0bf30aec802f6fe3d6cd74b16d00cb8db0d3c1b6 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 10 Nov 2023 11:00:49 +0000
Subject: [PATCH 1/4] [SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and
 LLVM intrinisc

This patch implements the builtins in Clang
and the LLVM-IR intrinsic for the following:

EXTQ
// Variants are also available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64
// _bf16, _f16, _f32, _f64
svuint8_t svextq_lane[_u8](svuint8_t zdn,

TBLQ and TBXQ
// Variants are also available for:
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm);
svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm);

UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svuzpq1[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svuzpq2[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svzipq1[_u8](svuint8_t zn, svuint8_t zm);
svuint8_t svzipq2[_u8](svuint8_t zn, svuint8_t zm);

PMOV
// Variants are available for:
// _s8, _u16, _s16, _s32, _u32, _s64, _u64
svbool_t svpmov_lane[_u8](svuint8_t zn, uint64_t imm);
svbool_t svpmov[_u8](svuint8_t zn); // The immediate is zero
svuint8_t svpmov_u8_z(svbool_t pn); // The immediate is zero

// Variants are available for:
// _s16, _s32, _u32, _s64, _u64
svuint16_t svpmov_lane[_u16]_m(svuint16_t zd, svbool_t pn, uint64_t imm);

According to the PR#257[1]
[1]ARM-software/acle#257

Co-author by: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
---
 clang/include/clang/Basic/arm_sve.td          |  33 ++
 clang/include/clang/Basic/arm_sve_sme_incl.td |   3 +
 clang/lib/Sema/SemaChecking.cpp               |  12 +
 .../acle_sve2p1_extq.c                        | 213 ++++++++++++
 .../acle_sve2p1_pmov_to_pred.c                | 304 ++++++++++++++++++
 .../acle_sve2p1_pmov_to_vector.c              | 276 ++++++++++++++++
 .../acle_sve2p1_tblq.c                        | 214 ++++++++++++
 .../acle_sve2p1_tbxq.c                        | 214 ++++++++++++
 .../acle_sve2p1_uzpq1.c                       | 217 +++++++++++++
 .../acle_sve2p1_uzpq2.c                       | 216 +++++++++++++
 .../acle_sve2p1_zipq1.c                       | 217 +++++++++++++
 .../acle_sve2p1_zipq2.c                       | 217 +++++++++++++
 .../acle_sve2p1_imm.cpp                       |   6 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  41 +++
 .../lib/Target/AArch64/AArch64InstrFormats.td |  33 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  18 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  86 ++++-
 .../CodeGen/AArch64/sve2p1-intrinsics-extq.ll |  83 +++++
 .../AArch64/sve2p1-intrinsics-pmov-to-pred.ll | 121 +++++++
 .../sve2p1-intrinsics-pmov-to-vector.ll       | 117 +++++++
 .../CodeGen/AArch64/sve2p1-intrinsics-tblq.ll |  83 +++++
 .../CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll |  83 +++++
 .../AArch64/sve2p1-intrinsics-uzpq1.ll        |  85 +++++
 .../AArch64/sve2p1-intrinsics-uzpq2.ll        |  85 +++++
 .../AArch64/sve2p1-intrinsics-zipq1.ll        |  85 +++++
 .../AArch64/sve2p1-intrinsics-zipq2.ll        |  85 +++++
 26 files changed, 3132 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..c377a0b89c1d591 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1992,3 +1992,36 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1" in {
+  // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+  def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
+  def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
+  def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
+  def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
+  // TBLQ, TBXQ
+  def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
+  def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
+  // EXTQ
+  def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
+  // PMOV
+  // Move to Pred
+  multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+    def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+    def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+  }
+  defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+  defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+  defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+  defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+
+  // Move to Vector
+  multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+    def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+    def _Z : SInst<name # "_{d}_z", "dP",  types, MergeNone, intrinsic # "_zeroing", flags, []>;
+  }
+  def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP",  "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
+  defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
+  defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
+  defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
+}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 22a2a3c5434d657..21dac067ab66e61 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -249,6 +249,9 @@ def ImmCheck0_0                 : ImmCheckType<16>; // 0..0
 def ImmCheck0_15                : ImmCheckType<17>; // 0..15
 def ImmCheck0_255               : ImmCheckType<18>; // 0..255
 def ImmCheck2_4_Mul2            : ImmCheckType<19>; // 2, 4
+def ImmCheck1_1                 : ImmCheckType<20>; // 1..1
+def ImmCheck1_3                 : ImmCheckType<21>; // 1..3
+def ImmCheck1_7                 : ImmCheckType<22>; // 1..7
 
 class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
   int Arg = arg;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..9dfff132cd88db3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3052,6 +3052,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
         HasError = true;
       break;
+    case SVETypeFlags::ImmCheck1_1:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheck1_3:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheck1_7:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+        HasError = true;
+      break;
     case SVETypeFlags::ImmCheckExtract:
       if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
                                       (2048 / ElementSizeInBits) - 1))
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
new file mode 100644
index 000000000000000..c49f8c838ace373
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -0,0 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
+  return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
+  return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
+  return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
+  return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
new file mode 100644
index 000000000000000..84f058ad8c16d35
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -0,0 +1,304 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z19test_svpmov_lane_u8u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_lane_u8(svuint8_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u8)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z19test_svpmov_lane_s8u10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_lane_s8(svint8_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s8)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u16u12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u16(svuint16_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u16)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s16u11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s16(svint16_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s16)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u32u12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u32(svuint32_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u32)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s32u11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s32(svint32_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s32)(zn, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_u64u12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_u64(svuint64_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u64)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 7)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z20test_svpmov_lane_s64u11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 7)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_lane_s64(svint64_t zn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s64)(zn, 7);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z14test_svpmov_u8u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_u8(svuint8_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _u8)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z14test_svpmov_s8u10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpmov_s8(svint8_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _s8)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u16u12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u16(svuint16_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _u16)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s16u11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv8i16(<vscale x 8 x i16> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s16(svint16_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _s16)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u32u12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u32(svuint32_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _u32)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s32u11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv4i32(<vscale x 4 x i32> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s32(svint32_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _s32)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_u64u12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_u64(svuint64_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _u64)(zn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i1> @_Z15test_svpmov_s64u11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv2i64(<vscale x 2 x i64> [[ZN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpmov_s64(svint64_t zn) {
+  return SVE_ACLE_FUNC(svpmov, _s64)(zn);
+}
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
new file mode 100644
index 000000000000000..1e45f1ecedce55f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
@@ -0,0 +1,276 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
+#endif
+
+// _m
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_u16_m
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_u16_mu12__SVUint16_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svpmov_lane_u16_m(svuint16_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u16, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_s16_m
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_s16_mu11__SVInt16_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svpmov_lane_s16_m(svint16_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s16, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_u32_m
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_u32_mu12__SVUint32_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svpmov_lane_u32_m(svuint32_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u32, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_s32_m
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 3)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_s32_mu11__SVInt32_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svpmov_lane_s32_m(svint32_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s32, _m)(zn, pn, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_u64_m
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_u64_mu12__SVUint64_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svpmov_lane_u64_m(svuint64_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _u64, _m)(zn, pn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_s64_m
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 7)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_s64_mu11__SVInt64_tu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svpmov_lane_s64_m(svint64_t zn, svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_lane, _s64, _m)(zn, pn, 7);
+}
+
+
+// _z
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svpmov_lane_u8_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z21test_svpmov_lane_u8_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svpmov_lane_u8_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_u8, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svpmov_lane_s8_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z21test_svpmov_lane_s8_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svpmov_lane_s8_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_s8, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_u16_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_u16_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svpmov_lane_u16_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_u16, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_s16_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z22test_svpmov_lane_s16_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svpmov_lane_s16_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_s16, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_u32_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_u32_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svpmov_lane_u32_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_u32, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svpmov_lane_s32_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z22test_svpmov_lane_s32_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svpmov_lane_s32_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_s32, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_u64_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_u64_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svpmov_lane_u64_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_u64, , _z)(pn);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svpmov_lane_s64_z
+// CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z22test_svpmov_lane_s64_zu10__SVBool_t
+// CPP-CHECK-SAME: (<vscale x 16 x i1> [[PN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PN]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svpmov_lane_s64_z(svbool_t pn) {
+  return SVE_ACLE_FUNC(svpmov_s64, , _z)(pn);
+}
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
new file mode 100644
index 000000000000000..56e95d1abace317
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -0,0 +1,214 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
+  return SVE_ACLE_FUNC(svtblq, _u8,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtblq_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
+  return SVE_ACLE_FUNC(svtblq, _u16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtblq_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
+  return SVE_ACLE_FUNC(svtblq, _u32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtblq_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svtblq_u64(svuint64_t zn, svuint64_t zm) {
+  return SVE_ACLE_FUNC(svtblq, _u64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_s8u10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svtblq_s8(svint8_t zn, svuint8_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _s8,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtblq_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_s16u11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svtblq_s16(svint16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _s16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtblq_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_s32u11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svtblq_s32(svint32_t zn, svuint32_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _s32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtblq_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_s64u11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svtblq_s64(svint64_t zn, svuint64_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _s64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svtblq_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtblq_f16u13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svtblq_f16(svfloat16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _f16,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svtblq_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtblq_f32u13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svtblq_f32(svfloat32_t zn, svuint32_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _f32,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svtblq_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtblq_f64u13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _f64,,)(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svtblq_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtblq, _bf16,,)(zn, zm);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
new file mode 100644
index 000000000000000..eeb589137d4f6d6
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -0,0 +1,214 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tu11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
+  return SVE_ACLE_FUNC(svtbxq, _u8,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtbxq_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tu12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
+  return SVE_ACLE_FUNC(svtbxq, _u16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtbxq_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tu12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
+  return SVE_ACLE_FUNC(svtbxq, _u32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtbxq_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tu12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
+  return SVE_ACLE_FUNC(svtbxq, _u64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tu10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _s8,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svtbxq_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tu11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _s16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svtbxq_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tu11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _s32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svtbxq_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tu11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _s64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svtbxq_f16
+// CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tu13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _f16,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svtbxq_f32
+// CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tu13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _f32,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svtbxq_f64
+// CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tu13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _f64,,)(passthru, zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svtbxq_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBFloat16_tu14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t zm) {
+    return SVE_ACLE_FUNC(svtbxq, _bf16,,)(passthru, zn, zm);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
new file mode 100644
index 000000000000000..0773f8d8d01966a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
+    return svuzpq1_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
+    return svuzpq1_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
+    return svuzpq1_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
+    return svuzpq1_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
+    return svuzpq1_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
+    return svuzpq1_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
+    return svuzpq1_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
+    return svuzpq1_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svuzpq1_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
+    return svuzpq1_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq1_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
+    return svuzpq1_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq1_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
+    return svuzpq1_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq1_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return svuzpq1_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
new file mode 100644
index 000000000000000..9883a7ef21196a3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -0,0 +1,216 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
+    return svuzpq2_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
+    return svuzpq2_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
+    return svuzpq2_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
+    return svuzpq2_u64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
+    return svuzpq2_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
+    return svuzpq2_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
+    return svuzpq2_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
+    return svuzpq2_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svuzpq2_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
+    return svuzpq2_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq2_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
+    return svuzpq2_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq2_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
+    return svuzpq2_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq2_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return svuzpq2_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
new file mode 100644
index 000000000000000..c7a1a9b2c227f7a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
+    return svzipq1_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
+    return svzipq1_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
+    return svzipq1_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
+    return svzipq1_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
+    return svzipq1_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
+    return svzipq1_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
+    return svzipq1_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
+    return svzipq1_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svzipq1_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
+    return svzipq1_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq1_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
+    return svzipq1_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq1_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
+    return svzipq1_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq1_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return svzipq1_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
new file mode 100644
index 000000000000000..220352ece1984b6
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -0,0 +1,217 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
+    return svzipq2_u8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
+    return svzipq2_u16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
+    return svzipq2_u32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
+    return svzipq2_u64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
+    return svzipq2_s8(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
+    return svzipq2_s16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
+    return svzipq2_s32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
+    return svzipq2_s64(zn, zm);
+}
+
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svzipq2_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
+    return svzipq2_f16(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq2_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
+    return svzipq2_f32(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq2_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
+    return svzipq2_f64(zn, zm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq2_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+    return svzipq2_bf16(zn, zm);
+}
+
+
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index c4e087c8b7d79ea..30d60cfe205e7e0 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -116,3 +116,9 @@ void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16
   svdot_lane_u32_u16_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
   svdot_lane_f32_f16_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }
+
+__attribute__((target("+sve2p1")))
+void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
+  svextq_lane_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  svextq_lane_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a42e2c49cb477ba..7d126d13b5a6460 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3455,3 +3455,44 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_sel_x4  : SVE2_VG4_Sel_Intrinsic;
 
 }
+
+// SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+//
+def int_aarch64_sve_zipq1     : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zipq2     : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzpq1     : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzpq2     : AdvSIMD_2VectorArg_Intrinsic;
+
+// SVE2.1 - Programmable table lookup within each quadword vector segment
+// (zeroing)/(merging)
+//
+def int_aarch64_sve_tblq : AdvSIMD_SVE_TBL_Intrinsic;
+def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic;
+
+// SVE2.1 - Extract vector segment from each pair of quadword segments.
+//
+def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+
+//
+// SVE2.1 - Move predicate to/from vector
+//
+def int_aarch64_sve_pmov_to_pred_lane :
+    DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                          [llvm_anyvector_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+def int_aarch64_sve_pmov_to_pred_lane_zero :
+    DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                          [llvm_anyvector_ty],
+                          [IntrNoMem]>;
+
+def int_aarch64_sve_pmov_to_vector_lane_merging :
+    DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                          [LLVMMatchType<0>,
+                          LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+def int_aarch64_sve_pmov_to_vector_lane_zeroing :
+    DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                          [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                          [IntrNoMem]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f88f5a240a1fd7f..68e87f491a09e45 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -818,8 +818,11 @@ def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
 
 def Imm0_0Operand : AsmImmRange<0, 0>;
 def Imm0_1Operand : AsmImmRange<0, 1>;
+def Imm1_1Operand : AsmImmRange<1, 1>;
 def Imm0_3Operand : AsmImmRange<0, 3>;
+def Imm1_3Operand : AsmImmRange<1, 3>;
 def Imm0_7Operand : AsmImmRange<0, 7>;
+def Imm1_7Operand : AsmImmRange<1, 7>;
 def Imm0_15Operand : AsmImmRange<0, 15>;
 def Imm0_31Operand : AsmImmRange<0, 31>;
 def Imm0_63Operand : AsmImmRange<0, 63>;
@@ -1035,6 +1038,13 @@ def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
   let ParserMatchClass = Imm0_1Operand;
 }
 
+// timm32_0_0 predicate - True if the 32-bit immediate is in the range [0,0]
+def timm32_0_0 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) == 0;
+}]> {
+  let ParserMatchClass = Imm0_0Operand;
+}
+
 // timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1]
 def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) < 2;
@@ -1042,6 +1052,20 @@ def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_1Operand;
 }
 
+// timm32_1_1 - True if the 32-bit immediate is in the range [1,1]
+def timm32_1_1 : Operand<i32>, TImmLeaf<i32, [{
+    return ((uint32_t)Imm) == 1;
+}]> {
+  let ParserMatchClass = Imm1_1Operand;
+}
+
+// timm32_1_3 predicate - True if the 32-bit immediate is in the range [1,3]
+def timm32_1_3 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 4;
+}]> {
+  let ParserMatchClass = Imm1_3Operand;
+}
+
 // imm0_15 predicate - True if the immediate is in the range [0,15]
 def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 16;
@@ -1077,6 +1101,13 @@ def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_7Operand;
 }
 
+// timm32_1_7 predicate - True if the 32-bit immediate is in the range [1,7]
+def timm32_1_7 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 8;
+}]> {
+  let ParserMatchClass = Imm1_7Operand;
+}
+
 // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
 def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 16;
@@ -1430,6 +1461,8 @@ let OperandNamespace = "AArch64" in {
   let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
     defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
                                 [{ return ((uint64_t)Imm) == 0; }]>;
+    defm VectorIndex032b : VectorIndex<i32, VectorIndex0Operand,
+                                [{ return ((uint32_t)Imm) == 0; }]>;
   }
 }
 defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a53973bad92e25f..18a1deb5955c3f0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3994,10 +3994,10 @@ defm FMAXQV   : sve2p1_fp_reduction_q<0b110, "fmaxqv">;
 defm FMINQV   : sve2p1_fp_reduction_q<0b111, "fminqv">;
 
 defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-def EXTQ_ZZI : sve2p1_extq<"extq">;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
 
-defm PMOV_PZI : sve2p1_vector_to_pred<"pmov">;
-defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov">;
+defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
+defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
 
 defm ORQV_VPZ   : sve2p1_int_reduce_q<0b1100, "orqv">;
 defm EORQV_VPZ  : sve2p1_int_reduce_q<0b1101, "eorqv">;
@@ -4008,12 +4008,12 @@ defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv">;
 defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv">;
 defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv">;
 
-defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, null_frag>;
-defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1">;
-defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2">;
-defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
-defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
-defm TBLQ_ZZZ  : sve2p1_tblq<"tblq">;
+defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1", int_aarch64_sve_zipq1>;
+defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2", int_aarch64_sve_zipq2>;
+defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1", int_aarch64_sve_uzpq1>;
+defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2", int_aarch64_sve_uzpq2>;
+defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, int_aarch64_sve_tbxq>;
+defm TBLQ_ZZZ  : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
 } // End HasSVE2p1_or_HasSME2p1
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index d54be1e406fed95..8dbfe098c7b5d84 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -9913,7 +9913,7 @@ multiclass sve2p1_dupq<string mnemonic> {
 
 // SVE Permute Vector - Quadwords (EXTQ)
 class sve2p1_extq<string mnemonic>
-    : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_15:$imm4),
+    : I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, timm32_0_15:$imm4),
         mnemonic, "\t$Zdn, $_Zdn, $Zm, $imm4",
         "", []>, Sched<[]> {
   bits<5> Zdn;
@@ -9931,6 +9931,19 @@ class sve2p1_extq<string mnemonic>
   let hasSideEffects = 0;
 }
 
+multiclass sve2p1_extq<string mnemonic, SDPatternOperator Op> {
+  def NAME : sve2p1_extq<mnemonic>;
+  def : SVE_3_Op_Imm_Pat<nxv16i8, Op, nxv16i8, nxv16i8, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv8i16, Op, nxv8i16, nxv8i16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv4i32, Op, nxv4i32, nxv4i32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv2i64, Op, nxv2i64, nxv2i64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+
+  def : SVE_3_Op_Imm_Pat<nxv8f16, Op, nxv8f16, nxv8f16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv4f32, Op, nxv4f32, nxv4f32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv2f64, Op, nxv2f64, nxv2f64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, nxv8bf16, i32, timm32_0_15, !cast<Instruction>(NAME
+)>;
+}
 
 // SVE move predicate from vector
 class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
@@ -9952,8 +9965,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
   let hasSideEffects = 0;
 }
 
-multiclass sve2p1_vector_to_pred<string mnemonic> {
-  def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8,  VectorIndex0>;
+multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDPatternOperator Op> {
+  def _B : sve2p1_vector_to_pred<{0, 0, 0, 1}, mnemonic, PPR8,  VectorIndex032b>;
   def _H : sve2p1_vector_to_pred<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
     bits<1> index;
     let Inst{17} = index;
@@ -9970,6 +9983,25 @@ multiclass sve2p1_vector_to_pred<string mnemonic> {
 
   def : InstAlias<mnemonic # "\t$Pd, $Zn",
                  (!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>;
+
+  // any_lane
+  def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))),
+            (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, timm32_0_0:$Idx)>;
+  def : Pat<(nxv8i1 (Op_lane (nxv8i16 ZPRAny:$Zn), (i32 timm32_0_1:$Idx))),
+            (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, timm32_0_1:$Idx)>;
+  def : Pat<(nxv4i1 (Op_lane (nxv4i32 ZPRAny:$Zn), (i32 timm32_0_3:$Idx))),
+            (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, timm32_0_3:$Idx)>;
+  def : Pat<(nxv2i1 (Op_lane (nxv2i64 ZPRAny:$Zn), (i32 timm32_0_7:$Idx))),
+            (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, timm32_0_7:$Idx)>;
+ // lane_0
+ def : Pat<(nxv16i1 (Op (nxv16i8 ZPRAny:$Zn))),
+            (!cast<Instruction>(NAME # _B) ZPRAny:$Zn, 0)>;
+  def : Pat<(nxv8i1 (Op (nxv8i16 ZPRAny:$Zn))),
+            (!cast<Instruction>(NAME # _H) ZPRAny:$Zn, 0)>;
+  def : Pat<(nxv4i1 (Op (nxv4i32 ZPRAny:$Zn))),
+            (!cast<Instruction>(NAME # _S) ZPRAny:$Zn, 0)>;
+  def : Pat<(nxv2i1 (Op (nxv2i64 ZPRAny:$Zn))),
+            (!cast<Instruction>(NAME # _D) ZPRAny:$Zn, 0)>;
 }
 
 
@@ -9993,7 +10025,8 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
   let hasSideEffects = 0;
 }
 
-multiclass sve2p1_pred_to_vector<string mnemonic> {
+multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp,
+                                 SDPatternOperator ZeroOp> {
   def _B : sve2p1_pred_to_vector<{0, 0, 0, 1}, mnemonic, PPR8,  VectorIndex0>;
   def _H : sve2p1_pred_to_vector<{0, 0, 1, ?}, mnemonic, PPR16, VectorIndexD32b> {
     bits<1> index;
@@ -10011,6 +10044,24 @@ multiclass sve2p1_pred_to_vector<string mnemonic> {
 
   def : InstAlias<mnemonic # "\t$Zd, $Pn",
                  (!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>;
+
+  // Merge
+  def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))),
+            (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, timm32_1_1:$Idx, PPR16:$Pn)>;
+  def : Pat<(nxv4i32 (MergeOp (nxv4i32 ZPRAny:$Zd), (nxv4i1 PPR32:$Pn), (i32 timm32_1_3:$Idx))),
+            (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, timm32_1_3:$Idx, PPR32:$Pn)>;
+  def : Pat<(nxv2i64 (MergeOp (nxv2i64 ZPRAny:$Zd), (nxv2i1 PPR64:$Pn), (i32 timm32_1_7:$Idx))),
+            (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, timm32_1_7:$Idx, PPR64:$Pn)>;
+
+  // Zero
+  def : Pat<(nxv16i8 (ZeroOp (nxv16i1 PPR8:$Pn))),
+           (!cast<Instruction>(NAME # _B) (IMPLICIT_DEF), 0, PPR8:$Pn)>;
+  def : Pat<(nxv8i16 (ZeroOp (nxv8i1 PPR16:$Pn))),
+            (!cast<Instruction>(NAME # _H) (IMPLICIT_DEF), 0, PPR16:$Pn)>;
+  def : Pat<(nxv4i32 (ZeroOp (nxv4i1 PPR32:$Pn))),
+            (!cast<Instruction>(NAME # _S) (IMPLICIT_DEF), 0, PPR32:$Pn)>;
+  def : Pat<(nxv2i64 (ZeroOp (nxv2i1 PPR64:$Pn))),
+            (!cast<Instruction>(NAME # _D) (IMPLICIT_DEF), 0, PPR64:$Pn)>;
 }
 
 
@@ -10066,18 +10117,41 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
   let hasSideEffects = 0;
 }
 
-multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {
+multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic,
+                                      SDPatternOperator op> {
   def _B : sve2p1_permute_vec_elems_q<0b00, opc, mnemonic, ZPR8,  ZPR8>;
   def _H : sve2p1_permute_vec_elems_q<0b01, opc, mnemonic, ZPR16, ZPR16>;
   def _S : sve2p1_permute_vec_elems_q<0b10, opc, mnemonic, ZPR32, ZPR32>;
   def _D : sve2p1_permute_vec_elems_q<0b11, opc, mnemonic, ZPR64, ZPR64>;
+
+  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
 }
 
-multiclass sve2p1_tblq<string mnemonic> {
+multiclass sve2p1_tblq<string mnemonic, SDPatternOperator op> {
   def _B : sve2p1_permute_vec_elems_q<0b00, 0b110, mnemonic, ZPR8,  Z_b>;
   def _H : sve2p1_permute_vec_elems_q<0b01, 0b110, mnemonic, ZPR16, Z_h>;
   def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
   def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
+
+  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
new file mode 100644
index 000000000000000..efe19432f9c32e9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_extq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_extq_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #0
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 0)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_extq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_extq_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #1
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_extq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_extq_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #2
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm, i32 2)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_extq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_extq_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #3
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 3)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_extq_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_extq_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #4
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 4)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_extq_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_extq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #5
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 5)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_extq_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_extq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #6
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 6)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_extq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_extq_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #15
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 15)
+  ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
new file mode 100644
index 000000000000000..7cae1d2c216b616
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+
+define <vscale x 16 x i1> @test_pmov_to_pred_i8(<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> %zn, i32 0)
+  ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @test_pmov_to_pred_i16(<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    mov z8.d, z0.d
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
+; CHECK-NEXT:    mov z0.d, z8.d
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov p4.b, p0.b
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
+; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res1 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 0)
+  %res2 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 1)
+
+  %res = add <vscale x 8 x i1> %res1, %res2
+  ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @test_pmov_to_pred_i32(<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    mov z8.d, z0.d
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
+; CHECK-NEXT:    mov z0.d, z8.d
+; CHECK-NEXT:    mov w0, #3 // =0x3
+; CHECK-NEXT:    mov p4.b, p0.b
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res1 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 0)
+  %res2 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 3)
+
+  %res = add <vscale x 4 x i1> %res1, %res2
+  ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @test_pmov_to_pred_i64(<vscale x 2 x i64> %zn) {
+; CHECK-LABEL: test_pmov_to_pred_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    mov z8.d, z0.d
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
+; CHECK-NEXT:    mov z0.d, z8.d
+; CHECK-NEXT:    mov w0, #7 // =0x7
+; CHECK-NEXT:    mov p4.b, p0.b
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    eor p0.b, p1/z, p4.b, p0.b
+; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res1 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 0)
+  %res2 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 7)
+
+  %res = add <vscale x 2 x i1> %res1, %res2
+  ret <vscale x 2 x i1> %res
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8>, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
new file mode 100644
index 000000000000000..58b240b0fbd6806
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+
+; Merge
+
+define <vscale x 8 x i16> @test_pmov_to_vector_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_pmov_to_vector_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, #3 // =0x3
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn, i32 3)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_pmov_to_vector_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, #7 // =0x7
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn, i32 7)
+  ret <vscale x 2 x i64> %res
+}
+
+
+; Zero
+
+define <vscale x 16 x i8> @test_pmov_to_vector_zero_i8(<vscale x 16 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> %pn)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_pmov_to_vector_zero_i16(<vscale x 8 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> %pn)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_pmov_to_vector_zero_i32(<vscale x 4 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> %pn)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_pmov_to_vector_zero_i64(<vscale x 2 x i1> %pn) {
+; CHECK-LABEL: test_pmov_to_vector_zero_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  entry:
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> %pn)
+  ret <vscale x 2 x i64> %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
new file mode 100644
index 000000000000000..cc9bbcfe47102b2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_tblq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_tblq_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.b, { z0.b }, z1.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_tblq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_tblq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tblq_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.s, { z0.s }, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_tblq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tblq_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_tblq_f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tblq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.s, { z0.s }, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_tblq_f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tblq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_tblq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tblq z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
new file mode 100644
index 000000000000000..29265b5d02e5188
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_tbxq_i8 (<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_tbxq_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_tbxq_i16 (<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tbxq_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_tbxq_i32 (<vscale x 4 x i32>  %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tbxq_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32>  %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_tbxq_i64 (<vscale x 2 x i64>  %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tbxq_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64>  %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half>  %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tblq_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half>  %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_tbxq_f32(<vscale x 4 x float>  %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_tbxq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float>  %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_tbxq_f64(<vscale x 2 x double>  %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_tbxq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double>  %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_tbxq_bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_tbxq_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbxq z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i64>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i16>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
new file mode 100644
index 000000000000000..7a83b1500846285
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_uzpq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_uzpq1_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_uzpq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_uzpq1_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_uzpq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_uzpq1_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32>  %res
+}
+
+define <vscale x 2 x i64> @test_uzpq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_uzpq1_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_uzpq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_uzpq1_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_uzpq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_uzpq1_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+  ret <vscale x 4 x float>  %res
+}
+
+define <vscale x 2 x double> @test_uzpq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_uzpq1_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_uzpq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_uzpq1_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
new file mode 100644
index 000000000000000..8695f1c3e1ce959
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_uzpq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_uzpq2_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_uzpq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_uzpq2_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_uzpq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_uzpq2_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32>  %res
+}
+
+define <vscale x 2 x i64> @test_uzpq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_uzpq2_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_uzpq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_uzpq2_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_uzpq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_uzpq2_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+  ret <vscale x 4 x float>  %res
+}
+
+define <vscale x 2 x double> @test_uzpq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_uzpq2_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_uzpq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_uzpq2_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzpq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
new file mode 100644
index 000000000000000..89383c1a3f230e8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_zipq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_zipq1_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_zipq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_zipq1_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_zipq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_zipq1_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32>  %res
+}
+
+define <vscale x 2 x i64> @test_zipq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_zipq1_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_zipq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_zipq1_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_zipq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_zipq1_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+  ret <vscale x 4 x float>  %res
+}
+
+define <vscale x 2 x double> @test_zipq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_zipq1_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_zipq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_zipq1_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
new file mode 100644
index 000000000000000..c9aaae3371e80ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_zipq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: test_zipq2_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_zipq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
+; CHECK-LABEL: test_zipq2_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_zipq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
+; CHECK-LABEL: test_zipq2_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
+  ret <vscale x 4 x i32>  %res
+}
+
+define <vscale x 2 x i64> @test_zipq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
+; CHECK-LABEL: test_zipq2_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_zipq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
+; CHECK-LABEL: test_zipq2_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_zipq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
+; CHECK-LABEL: test_zipq2_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+  ret <vscale x 4 x float>  %res
+}
+
+define <vscale x 2 x double> @test_zipq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
+; CHECK-LABEL: test_zipq2_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_zipq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: test_zipq2_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zipq2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 8 x bfloat> %res
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)

>From aa4911dbadc8e80f45df267f88eb17e4a61356ba Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 10 Nov 2023 12:45:00 +0000
Subject: [PATCH 2/4] Update the clang tests

---
 .../acle_sve2p1_extq.c                        | 34 +++++++++----------
 .../acle_sve2p1_tblq.c                        | 20 +++++------
 .../acle_sve2p1_tbxq.c                        | 34 +++++++++----------
 .../acle_sve2p1_uzpq1.c                       | 34 +++++++++----------
 .../acle_sve2p1_uzpq2.c                       | 34 +++++++++----------
 .../acle_sve2p1_zipq1.c                       | 34 +++++++++----------
 .../acle_sve2p1_zipq2.c                       | 34 +++++++++----------
 7 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
index c49f8c838ace373..7704db5667a2a7f 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 
 #include <arm_sve.h>
@@ -26,7 +26,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
@@ -42,7 +42,7 @@ svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
@@ -58,7 +58,7 @@ svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
@@ -74,7 +74,7 @@ svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
@@ -90,7 +90,7 @@ svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
@@ -106,7 +106,7 @@ svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
@@ -122,7 +122,7 @@ svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
@@ -138,7 +138,7 @@ svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
@@ -154,7 +154,7 @@ svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
@@ -170,7 +170,7 @@ svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
@@ -186,7 +186,7 @@ svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
@@ -202,7 +202,7 @@ svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
index 56e95d1abace317..6c04413c238a62b 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 
@@ -27,7 +27,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtblq_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -43,7 +43,7 @@ svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtblq_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -59,7 +59,7 @@ svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtblq_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -75,7 +75,7 @@ svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtblq_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtblq_bf16u14__SVBfloat16_tu12__SVUint16_t
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
index eeb589137d4f6d6..0ad7107b676709d 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 
@@ -27,7 +27,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tu11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_u8u11__SVUint8_tS_S_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -43,7 +43,7 @@ svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tu12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_u16u12__SVUint16_tS_S_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -59,7 +59,7 @@ svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tu12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_u32u12__SVUint32_tS_S_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -75,7 +75,7 @@ svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tu12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_u64u12__SVUint64_tS_S_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tu10__SVInt8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z14test_svtbxq_s8u10__SVInt8_tS_u11__SVUint8_t
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tu11__SVInt16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z15test_svtbxq_s16u11__SVInt16_tS_u12__SVUint16_t
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[PASSTHRU:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tu11__SVInt32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svtbxq_s32u11__SVInt32_tS_u12__SVUint32_t
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tu11__SVInt64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svtbxq_s64u11__SVInt64_tS_u12__SVUint64_t
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -155,7 +155,7 @@ svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tu13__SVFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svtbxq_f16u13__SVFloat16_tS_u12__SVUint16_t
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[PASSTHRU:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -171,7 +171,7 @@ svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tu13__SVFloat32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svtbxq_f32u13__SVFloat32_tS_u12__SVUint32_t
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[PASSTHRU:%.*]], <vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -187,7 +187,7 @@ svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tu13__SVFloat64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svtbxq_f64u13__SVFloat64_tS_u12__SVUint64_t
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[PASSTHRU:%.*]], <vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBFloat16_tu14__SVBFloat16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svtbxq_bf16u14__SVBfloat16_tS_u12__SVUint16_t
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[PASSTHRU:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index 0773f8d8d01966a..b0e2e4b9c7a5981 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
@@ -26,7 +26,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq1_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq1_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq1_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq1_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq1_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq1_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq1_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq1_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index 9883a7ef21196a3..154b4564f9c6e82 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
@@ -26,7 +26,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -90,7 +90,7 @@ svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svuzpq2_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -106,7 +106,7 @@ svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svuzpq2_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -122,7 +122,7 @@ svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svuzpq2_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -138,7 +138,7 @@ svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svuzpq2_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -155,7 +155,7 @@ svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svuzpq2_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -171,7 +171,7 @@ svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svuzpq2_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -187,7 +187,7 @@ svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svuzpq2_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -203,7 +203,7 @@ svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svuzpq2_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
index c7a1a9b2c227f7a..d96c7eda374d896 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
@@ -26,7 +26,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq1_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq1_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq1_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq1_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq1_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq1_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq1_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq1_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 220352ece1984b6..17800325bb09901 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -1,14 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
@@ -26,7 +26,7 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_u8u11__SVUint8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -42,7 +42,7 @@ svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_u16u12__SVUint16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -58,7 +58,7 @@ svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_u32u12__SVUint32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -74,7 +74,7 @@ svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -91,7 +91,7 @@ svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z15test_svzipq2_s8u10__SVInt8_tS_
 // CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
@@ -107,7 +107,7 @@ svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z16test_svzipq2_s16u11__SVInt16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
@@ -123,7 +123,7 @@ svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z16test_svzipq2_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
@@ -139,7 +139,7 @@ svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z16test_svzipq2_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
@@ -156,7 +156,7 @@ svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z16test_svzipq2_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
@@ -172,7 +172,7 @@ svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z16test_svzipq2_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
@@ -188,7 +188,7 @@ svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z16test_svzipq2_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
@@ -204,7 +204,7 @@ svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z17test_svzipq2_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])

>From 7d172a0d14f7471090c4cf56f0f00ef095c820cc Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 20 Nov 2023 14:42:45 +0000
Subject: [PATCH 3/4] Update zip tests

---
 clang/include/clang/Basic/arm_sve.td          | 12 ++++----
 .../acle_sve2p1_uzpq1.c                       | 28 +++++++++---------
 .../acle_sve2p1_uzpq2.c                       | 29 ++++++++++---------
 .../acle_sve2p1_zipq1.c                       | 28 +++++++++---------
 .../acle_sve2p1_zipq2.c                       | 28 +++++++++---------
 .../acle_sve2p1_imm.cpp                       | 28 ++++++++++++++++++
 6 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index c377a0b89c1d591..2a3a373c0de316d 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2007,13 +2007,13 @@ let TargetGuard = "sve2p1" in {
   // PMOV
   // Move to Pred
   multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
-    def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
-    def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+    def _LANE      : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+    def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
   }
-  defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
-  defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
-  defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
-  defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+  defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+  defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+  defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+  defm SVPMOV_D_TO_PRED : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
 
   // Move to Vector
   multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index b0e2e4b9c7a5981..c0fec3951ff58de 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -15,9 +15,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_u8
@@ -33,7 +33,7 @@
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
-    return svuzpq1_u8(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_u8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_u16
@@ -49,7 +49,7 @@ svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
-    return svuzpq1_u16(zn, zm);
+  return SVE_ACLE_FUNC(svuzpq1,_u16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_u32
@@ -65,7 +65,7 @@ svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
-    return svuzpq1_u32(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_u32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_u64
@@ -81,7 +81,7 @@ svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
-    return svuzpq1_u64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_u64)(zn, zm);
 }
 
 
@@ -98,7 +98,7 @@ svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
-    return svuzpq1_s8(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_s8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq1_s16
@@ -114,7 +114,7 @@ svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
-    return svuzpq1_s16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_s16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq1_s32
@@ -130,7 +130,7 @@ svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
-    return svuzpq1_s32(zn, zm);
+  return SVE_ACLE_FUNC(svuzpq1,_s32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq1_s64
@@ -146,7 +146,7 @@ svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
-    return svuzpq1_s64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_s64)(zn, zm);
 }
 
 
@@ -163,7 +163,7 @@ svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
-    return svuzpq1_f16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_f16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq1_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
-    return svuzpq1_f32(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_f32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq1_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
-    return svuzpq1_f64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_f64)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq1_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return svuzpq1_bf16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq1,_bf16)(zn, zm);
 }
 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index 154b4564f9c6e82..0477ed2c6c28f10 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -15,9 +15,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_u8
@@ -33,7 +33,7 @@
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
-    return svuzpq2_u8(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_u8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_u16
@@ -49,7 +49,7 @@ svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
-    return svuzpq2_u16(zn, zm);
+  return SVE_ACLE_FUNC(svuzpq2,_u16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_u32
@@ -65,7 +65,7 @@ svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
-    return svuzpq2_u32(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_u32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_u64
@@ -81,9 +81,10 @@ svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
-    return svuzpq2_u64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_u64)(zn, zm);
 }
 
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_s8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
@@ -97,7 +98,7 @@ svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
-    return svuzpq2_s8(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_s8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svuzpq2_s16
@@ -113,7 +114,7 @@ svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
-    return svuzpq2_s16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_s16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svuzpq2_s32
@@ -129,7 +130,7 @@ svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
-    return svuzpq2_s32(zn, zm);
+  return SVE_ACLE_FUNC(svuzpq2,_s32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svuzpq2_s64
@@ -145,7 +146,7 @@ svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
-    return svuzpq2_s64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_s64)(zn, zm);
 }
 
 
@@ -162,7 +163,7 @@ svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
-    return svuzpq2_f16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_f16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svuzpq2_f32
@@ -178,7 +179,7 @@ svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
-    return svuzpq2_f32(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_f32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svuzpq2_f64
@@ -194,7 +195,7 @@ svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
-    return svuzpq2_f64(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_f64)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svuzpq2_bf16
@@ -210,7 +211,7 @@ svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return svuzpq2_bf16(zn, zm);
+    return SVE_ACLE_FUNC(svuzpq2,_bf16)(zn, zm);
 }
 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
index d96c7eda374d896..06297651471ffd5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -15,9 +15,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_u8
@@ -33,7 +33,7 @@
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
-    return svzipq1_u8(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_u8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_u16
@@ -49,7 +49,7 @@ svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
-    return svzipq1_u16(zn, zm);
+  return SVE_ACLE_FUNC(svzipq1,_u16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_u32
@@ -65,7 +65,7 @@ svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
-    return svzipq1_u32(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_u32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_u64
@@ -81,7 +81,7 @@ svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
-    return svzipq1_u64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_u64)(zn, zm);
 }
 
 
@@ -98,7 +98,7 @@ svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
-    return svzipq1_s8(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_s8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq1_s16
@@ -114,7 +114,7 @@ svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
-    return svzipq1_s16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_s16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq1_s32
@@ -130,7 +130,7 @@ svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
-    return svzipq1_s32(zn, zm);
+  return SVE_ACLE_FUNC(svzipq1,_s32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq1_s64
@@ -146,7 +146,7 @@ svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
-    return svzipq1_s64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_s64)(zn, zm);
 }
 
 
@@ -163,7 +163,7 @@ svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
-    return svzipq1_f16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_f16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq1_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
-    return svzipq1_f32(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_f32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq1_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
-    return svzipq1_f64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_f64)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq1_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return svzipq1_bf16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq1,_bf16)(zn, zm);
 }
 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 17800325bb09901..04cb6c69de6c79a 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -15,9 +15,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_u8
@@ -33,7 +33,7 @@
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
-    return svzipq2_u8(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_u8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_u16
@@ -49,7 +49,7 @@ svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
-    return svzipq2_u16(zn, zm);
+  return SVE_ACLE_FUNC(svzipq2,_u16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_u32
@@ -65,7 +65,7 @@ svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
-    return svzipq2_u32(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_u32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_u64
@@ -81,7 +81,7 @@ svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
-    return svzipq2_u64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_u64)(zn, zm);
 }
 
 
@@ -98,7 +98,7 @@ svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
-    return svzipq2_s8(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_s8)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svzipq2_s16
@@ -114,7 +114,7 @@ svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
-    return svzipq2_s16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_s16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svzipq2_s32
@@ -130,7 +130,7 @@ svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
-    return svzipq2_s32(zn, zm);
+  return SVE_ACLE_FUNC(svzipq2,_s32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svzipq2_s64
@@ -146,7 +146,7 @@ svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
-    return svzipq2_s64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_s64)(zn, zm);
 }
 
 
@@ -163,7 +163,7 @@ svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
-    return svzipq2_f16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_f16)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svzipq2_f32
@@ -179,7 +179,7 @@ svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
-    return svzipq2_f32(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_f32)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svzipq2_f64
@@ -195,7 +195,7 @@ svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
-    return svzipq2_f64(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_f64)(zn, zm);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svzipq2_bf16
@@ -211,7 +211,7 @@ svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return svzipq2_bf16(zn, zm);
+    return SVE_ACLE_FUNC(svzipq2,_bf16)(zn, zm);
 }
 
 
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index 30d60cfe205e7e0..84fdba432c24497 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -122,3 +122,31 @@ void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, sv
   svextq_lane_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
   svextq_lane_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
 }
+
+__attribute__((target("+sve2p1")))
+void test_svpmov_lane(){
+  svuint8_t zn_u8;
+  svuint16_t zn_u16;
+  svuint32_t zn_u32;
+  svuint64_t zn_u64;
+  svbool_t pn;
+
+  svpmov_lane_u8(zn_u8, -1); // expected-error {{argument value -1 is outside the valid range [0, 0]}}
+  svpmov_lane_u16(zn_u16, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  svpmov_lane_u32(zn_u32, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  svpmov_lane_u64(zn_u64, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+
+  svpmov_lane_u8(zn_u8, 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+  svpmov_lane_u16(zn_u16, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}}
+  svpmov_lane_u32(zn_u32, 5); // expected-error {{argument value 5 is outside the valid range [0, 3]}}
+  svpmov_lane_u64(zn_u64, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+
+  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 1]}}
+  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
+  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 0); // expected-error {{argument value 0 is outside the valid range [1, 7]}}
+
+  zn_u16 = svpmov_lane_u16_m(zn_u16, pn, 3); // expected-error {{argument value 3 is outside the valid range [1, 1]}}
+  zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}}
+  zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}}
+}

>From 54bc8bb0d11bbeab77fef494334fce0f90a17882 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 20 Nov 2023 17:45:02 +0000
Subject: [PATCH 4/4] Fix pmov to have a constant of uint64_t for lane

---
 clang/include/clang/Basic/arm_sve.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 2a3a373c0de316d..59dc10c884b19e2 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2007,7 +2007,7 @@ let TargetGuard = "sve2p1" in {
   // PMOV
   // Move to Pred
   multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
-    def _LANE      : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+    def _LANE      : Inst<name # "_lane[_{d}]", "Pdi", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
     def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
   }
   defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
@@ -2017,7 +2017,7 @@ let TargetGuard = "sve2p1" in {
 
   // Move to Vector
   multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
-    def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+    def _M : SInst<name # "_lane[_{d}]", "ddPi", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
     def _Z : SInst<name # "_{d}_z", "dP",  types, MergeNone, intrinsic # "_zeroing", flags, []>;
   }
   def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP",  "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;



More information about the cfe-commits mailing list